Ejemplo n.º 1
0
def get_train_data(df, train_size=1):
    # df = pd.read_csv('data.csv')
    col = [
        'AWS/EC2 NetworkIn', 'AWS/EC2 NetworkOut',
        'System/Linux MemoryUtilization', 'AWS/EC2 CPUUtilization'
    ]
    # 0 = Timestamp, 1 = Net in, 2 = Net Out, 3 = Memory, 4 = CPU

    # Feature generation
    df_features = pd.DataFrame()
    # df_features['NetInDiff'] = df[col[1]] - df[col[1]].shift(1)
    # df_features['NetOutDiff'] = df[col[2]] - df[col[2]].shift(1)
    # df_features['MemDiff'] = df[col[3]] - df[col[3]].shift(1)
    # df_features['CPUDiff'] = df[col[4]] - df[col[4]].shift(1)
    df_features[col] = df[col]
    # del df_features['Timestamps']
    df_features['wavelet'] = wavelet(df)
    df_features = df_features.fillna(method='bfill')
    col = df_features.columns
    for column in col:
        df_features[column] = smooth(df_features[column].values).tolist()
        plt.plot(df_features[column])
        plt.show()
    df_features -= df_features.min()
    df_features /= df_features.max()
    # plt.imshow(df_features.corr())
    # plt.show()

    df_labels = pd.DataFrame()
    for val in range(PREDICT_LEN):
        df_labels['lag{0}'.format(val)] = df['AWS/EC2 CPUUtilization'].shift(
            -val)
    df_labels = df_labels.fillna(method='ffill')
    df_labels -= df_labels.min()
    df_labels /= df_labels.max()
    values = df_features.values
    labels = (df_labels.values[FEED_LEN:] + 1) / 2

    del df_labels
    del df_features
    del df

    features = []
    for i in range(FEED_LEN, values.shape[0]):
        features.append(list(values[i - FEED_LEN:i]))
    del values
    features = np.asarray(features)

    x_ts, y_ts = np.array([None]), np.array([None])
    if train_size < 1:
        features, labels, x_ts, y_ts = \
            train_test_split(features, labels, train_size=train_size, shuffle=False)
    return features, labels, x_ts, y_ts
Ejemplo n.º 2
0
def generate_thread(name, label, queue):
    global count, MAX_FILE_NUMBER
    output_file = open("thread_%s_%s"%(name,label), "w")
    while (count <= MAX_FILE_NUMBER):
        print 'job ', name
        f = queue.get()
        dimension = wavelet.wavelet(f)
        line = format_output(enumerate(dimension), label)
        output_file.write(line)
        with output_lock:
            count += 1
            print count, f
            
    count = 0
    output_file.close()
Ejemplo n.º 3
0
def generate_thread(name, label, queue):
    global count, MAX_FILE_NUMBER
    output_file = open("thread_%s_%s" % (name, label), "w")
    while (count <= MAX_FILE_NUMBER):
        print 'job ', name
        f = queue.get()
        dimension = wavelet.wavelet(f)
        line = format_output(enumerate(dimension), label)
        output_file.write(line)
        with output_lock:
            count += 1
            print count, f

    count = 0
    output_file.close()
Ejemplo n.º 4
0
    def __init__(self, pic_dir='src/img.jpg', data=None):
        super().__init__()
        if data is None:
            self.config = json.load(open('config.json'))
            self.PIC = np.array(Image.open(pic_dir)).transpose(2, 0, 1)
        else:
            # Only for debug

            self.PIC = np.expand_dims(data, axis=0)
        # Define some parameters
        self.BLOCK_SIZE = 5
        self.BLK_STRIDE = 3
        self.WINDOW_SIZE = 25
        self.TH = 2500
        self.MAX_COUNT = 400
        self.WAVELET = wavelet()

        self.ASSEMBLE_DICT = {}
Ejemplo n.º 5
0
    data = (spio.loadmat(HomePath+FlickDir+'3_ScalColor256'))['data']
elif FeaType == 4:
    data = (spio.loadmat(HomePath+FlickDir+'4_HomoText43'))['data']
elif FeaType == 5:
    data = (spio.loadmat(HomePath+FlickDir+'5_EdgeHist150'))['data']
OutFile = OutPath+str(FeaType)+'_0605.csv'
fout = open(OutFile,'wb')
headers = 'qid NumMachine NumForEach k LevelRs Pivots RepeatTime NaiveCost Cost QCost'.split()
dw = csv.DictWriter(fout,headers,restval='NULL');
dw.writeheader()
fout.close()

seed(302)
FeaLen = data.shape[1]
Total = data.shape[0]
W = wavelet(FeaLen).T
RepeatTime = 30;
QList = sample(xrange(Total), RepeatTime)

#################################################################################
# Paramaters to be tuned.

kList = [1,5,10,15,20]
NumForEachList = [100,200,300,400,500]
NumMachList = [100,500,1000,1500,2000]
"""
kList = [2]
NumMachList = [2]
NumForEachList = [2]
"""
#################################################################################
Ejemplo n.º 6
0
                name, ext = os.path.splitext(filename)

                img = tv(image)

                os.makedirs('denoise/tv', exist_ok=True)

                imsave('denoise/tv/%s.png' % name, img)

        if (sys.argv[2] == 'wavelet'):

            for image in sys.argv[3:]:

                filename = os.path.basename(image)
                name, ext = os.path.splitext(filename)

                img = wavelet(image)
                os.makedirs('denoise/wavelet', exist_ok=True)

                imsave('denoise/wavelet/%s.png' % name, img)

        if (sys.argv[2] == 'wiener'):
            for image in sys.argv[3:]:

                filename = os.path.basename(image)
                name, ext = os.path.splitext(filename)

                img = wiener(image)

                os.makedirs('denoise/wiener', exist_ok=True)

                imsave('denoise/wiener/%s.png' % name, img)