def preprocess_income_data(label_path, edge_path, x_path, y_path,
                           edge_list_path):
    targets = pd.read_csv(label_path, sep=' ')
    targets.columns = ['out_id', 'mean_income']
    print 'target labels of shape: ', targets.shape
    edges = pd.read_csv(edge_path)
    edges.columns = ['out_id', 'in_id']
    print 'edge list of shape: ', edges.shape
    all_data = edges.merge(targets)
    print 'all data of shape: ', all_data.shape
    X, y, edge_list = preprocess_data(all_data)
    utils.persist_edgelist(edge_list, edge_list_path)
    utils.persist_data(x_path, y_path, X, y)
    def weatherbit(self):
        target = 'energy'
        features_data = read_data('tsdb_weatherbit_cleaned')
        target_data = read_data('tsdb_sma_energy')

        features_data.index.name = 'datetime'
        target_data.index.name = 'datetime'

        merged = features_data.merge(target_data, on='datetime')

        q = merged[target].quantile(0.99)
        merged = merged[merged[target] < q]

        df = self.to_DataFrame(merged, self.features, target)
        persist_data(self.table, df)
    def mers_cams_data(self):
        target = 'consumption'

        energy = read_data('tsdb_fronius_energy')
        cams = read_data('tsdb_cams')
        merra = read_data('tsdb_merra')

        energy.index.name = 'datetime'
        cams.index.name = 'datetime'
        merra.index.name = 'datetime'
        energy = energy.resample('1H').agg('mean')

        merged = cams.merge(energy, on='datetime')
        merged = merged.merge(merra, on='datetime')
        merged['consumption'] = merged['FromGenToConsumer'] + merged[
            'FromGridToConsumer']

        q = merged[target].quantile(0.99)
        merged = merged[merged[target] < q]
        df = self.to_DataFrame(merged, self.features, target)
        persist_data(self.table, df)
def preprocess_income_data():
    targets = pd.read_csv(
        'local_resources/Socio_economic_classification_data/income_dataset/users-income',
        sep=' ')
    targets.columns = ['fan_id', 'mean_income']
    print 'target labels of shape: ', targets.shape
    edges = pd.read_csv(
        'local_resources/Socio_economic_classification_data/income_dataset/users_friends.csv'
    )
    edges.columns = ['fan_id', 'star_id']
    print 'edge list of shape: ', edges.shape
    all_data = edges.merge(targets)
    print 'all data of shape: ', all_data.shape
    X, y, edge_list = preprocess_data(all_data)
    utils.persist_edgelist(
        edge_list,
        'local_resources/Socio_economic_classification_data/income_dataset/income.edgelist'
    )
    utils.persist_data(
        'local_resources/Socio_economic_classification_data/income_dataset/X.p',
        'local_resources/Socio_economic_classification_data/income_dataset/y.p',
        X, y)
def save_response(resp):
    persist_data('tsdb_sma_energy', resp)
 def persist_predictions(self, target, predictions):
     table = 'tsdb_' + target + '_predictions'
     persist_data(table, predictions)
     print("persisted")
 def persist_data(self, table, dataframe):
     persist_data(table, dataframe)