Ejemplo n.º 1
0
  def get_people(self):
    yield chair_info(CHAIR_URL)
    for row in csv_reader(COUNCIL_PAGE, header=True, headers={'Cookie': 'incap_ses_168_68279=7jCHCh608QQSFVti3dtUAviu/1IAAAAAIRf6OsZL0NttnlzANkVb6w=='}):

      p = Legislator(
        name='%(FirstName0)s %(LastName0)s' % row,
        post_id='%(MUNIC)s Ward %(WARDNUM)s' % row,
        role='Councillor',
      )
      p.add_contact('email', row['email0'], None)
      p.add_contact('voice', row['Phone0'], 'legislature')
      p.add_extra('boundary_url', '/boundaries/%s-wards/ward-%s/' % (row['MUNIC'].lower(), row['WARDNUM']))
      p.add_source(COUNCIL_PAGE)
      yield p

      if row['FirstName1'].strip():
        p = Legislator(
          name='%s %s' % (row['FirstName1'], row['LastName1']),
          post_id='%(MUNIC)s Ward %(WARDNUM)s' % row,
          role='Councillor',
        )
        p.add_contact('email', row['email1'], None)
        p.add_contact('voice', row['Phone1'], 'legislature')
        p.add_extra('boundary_url', '/boundaries/%s-wards/ward-%s/' % (row['MUNIC'].lower(), row['WARDNUM']))
        p.add_source(COUNCIL_PAGE)
        yield p
Ejemplo n.º 2
0
  def get_people(self):
    reader = csv_reader(COUNCIL_PAGE, header=True)
    for row in reader:
      kwargs = {'role': 'candidate'}
      email = None
      links = []
      extra = {}
      offices = []

      for k, v in row.items():
        v = v.strip()
        if not v:
          continue

        k = k.strip()
        match = re.search(r'\AOffice (\d): ', k)
        if match:
          index = int(match.group(1))
          while index > len(offices):
            offices.append({})
          if k[10:] == 'Type':
            offices[index - 1]['note'] = v
          elif k[10:] in CONTACT_TYPE_KEYS:
            offices[index - 1][CONTACT_TYPE_KEYS[k[10:]]] = v
          else:
            raise Exception(k)
        elif k == 'Party Name':
          kwargs['party'] = PARTY_MAP[v]
        elif k in KEYS:
          kwargs[KEYS[k]] = v
        elif k == 'Email':
          email = v
        elif k in LINKS_KEYS:
          links.append({'url': v, 'note': k})
        elif k in IGNORE_KEYS:
          continue
        elif k in EXTRA_KEYS:
          extra[re.sub(r'[^a-z0-9_]', '', k.lower().replace(' ', '_'))] = v
        else:
          raise Exception(k)

      contacts = []
      for office in offices:
        for _, type in CONTACT_TYPE_KEYS.items():
          if office.get(type):
            contacts.push({'note': office['note'], type: type, 'value': office[type]})

      if 'name' in kwargs:
        p = Legislator(**kwargs)
        p.add_source(COUNCIL_PAGE)
        if email:
          p.add_contact('email', email, None)
        for link in links:
          p.add_link(**links)
        for contact in contacts:
          p.add_contact(**contact)
        for k, v in extra.items():
          p.add_extra(k, v)
        yield p
Ejemplo n.º 3
0
def get_files(folds_dir, split, fold):
    splits = ['train', 'valid', 'test']
    csv_dir = join(folds_dir, 'split_{}'.format(split), 'fold_{}'.format(fold))
    csv_files = [
        join(csv_dir, '{}_s_{}_f_{}.csv'.format(s, split, fold))
        for s in splits
    ]
    split_files = [csv_reader(csv) for csv in csv_files]
    return split_files
Ejemplo n.º 4
0
def load_src():
    count = 0
    for row in csv_reader('nounsword.csv'):
        count += 1
        if count < start_row:
            continue
        ret = bd_trans(row[0])
        print(ret)
        trans = ret.get('trans_result')
        write_row_csv('trans_word.csv', trans[0].values())
Ejemplo n.º 5
0
def csv():
    """
    HW with CSV
    :return: template
    """
    av_height, av_weight = utils.csv_reader()
    return render_template('csv.html',
                           title='CSV',
                           av_height=av_height,
                           av_weight=av_weight)
Ejemplo n.º 6
0
def get_spike_detection():
    """
    Get all of the available hashtags
    """
    word_type = request.get_json()["word_type"]
    time_frame = request.get_json()["time_frame"]

    file_name = "./files/{}/{}_totalcounts_{}.csv".format(
        time_frame, word_type, time_frame)
    # print(file_name)

    data = utils.csv_reader(file_name)

    res = {"data": data}
    return jsonify(res), 201
Ejemplo n.º 7
0
 def get_people(self):
   for row in csv_reader(COUNCIL_PAGE, header=True):
     p = Legislator(
       name='%(First Name)s %(Last Name)s' % row,
       post_id='Vancouver',
       role=row['Elected Office'],
       gender=row['Gender'],
       image=row['Photo URL'],
     )
     p.add_contact('email', row['Email'], None)
     p.add_contact('voice', row['Phone'], 'legislature')
     p.add_contact('fax', row['Fax'], 'legislature')
     p.add_contact('address', '%(Address line 1)s\n%(Locality)s %(Province)s  %(Postal Code)s' % row, 'legislature')
     p.add_source(COUNCIL_PAGE)
     p.add_source(row['URL'])
     yield p
Ejemplo n.º 8
0
    def get_people(self):
        yield chair_info(CHAIR_URL)
        for row in csv_reader(
                COUNCIL_PAGE,
                header=True,
                headers=
            {
                'Cookie':
                'incap_ses_168_68279=7jCHCh608QQSFVti3dtUAviu/1IAAAAAIRf6OsZL0NttnlzANkVb6w=='
            }):

            p = Legislator(
                name='%(FirstName0)s %(LastName0)s' % row,
                post_id='%(MUNIC)s Ward %(WARDNUM)s' % row,
                role='Councillor',
            )
            p.add_contact('email', row['email0'], None)
            p.add_contact('voice', row['Phone0'], 'legislature')
            p.add_extra(
                'boundary_url', '/boundaries/%s-wards/ward-%s/' %
                (row['MUNIC'].lower(), row['WARDNUM']))
            p.add_source(COUNCIL_PAGE)
            yield p

            if row['FirstName1'].strip():
                p = Legislator(
                    name='%s %s' % (row['FirstName1'], row['LastName1']),
                    post_id='%(MUNIC)s Ward %(WARDNUM)s' % row,
                    role='Councillor',
                )
                p.add_contact('email', row['email1'], None)
                p.add_contact('voice', row['Phone1'], 'legislature')
                p.add_extra(
                    'boundary_url', '/boundaries/%s-wards/ward-%s/' %
                    (row['MUNIC'].lower(), row['WARDNUM']))
                p.add_source(COUNCIL_PAGE)
                yield p
def main(args):
    '''Main function for imputed with GINN

    Args:
        - from_id: start index to file list
        - to_id: end index to file list
        - fold_size: fold_size start from index 1 
    Returns:
        - write imputed_data: imputed data
    '''

    # Input parameters
    from_id = args.from_id
    to_id = args.to_id
    fold_size = args.fold_size

    # Initial parameters
    missingness_flag = [0, 10, 20, 30, 40, 50]  # t% missing data
    seed = 42

    # Main program
    for i_file in range(from_id, to_id):
        file_name = file_list[i_file]
        print(datetime.datetime.now(), "File {}: {}".format(i_file, file_name))
        for i in tqdm(range(1, fold_size)):
            for missingness in missingness_flag:
                (D_miss_train, D_miss_test) = csv_reader(data_K_Fold, file_name, i, method='data_missing',
                                                         missingness=missingness)
                x_train = D_miss_train[:, :(D_miss_train.shape[1] - 1)]
                y_train = D_miss_train[:, -1]
                x_test = D_miss_test[:, :(D_miss_test.shape[1] - 1)]
                y_test = D_miss_test[:, -1]

                missing_train, missing_train_mask = mask_generation(x_train)
                missing_test, missing_test_mask = mask_generation(x_test)

                cx_train = np.c_[missing_train, y_train]
                cx_test = np.c_[missing_test, y_test]

                mask_train = np.c_[missing_train_mask, np.ones(y_train.shape)]
                mask_test = np.c_[missing_test_mask, np.ones(y_test.shape)]

                # Here we proprecess the data applying a one-hot encoding for the categorical variables. We get the encoded dataset
                # three different masks that indicates the missing features and if these features are categorical or numerical,
                # plus the new columns for the categorical variables with their one-hot range.
                numerical_columns = dictionary_datasets['{}'.format(file_name)]['numerical']
                categorical_columns = dictionary_datasets['{}'.format(file_name)]['categorical']
                [oh_data, oh_mask, oh_numerical_mask, oh_categorical_mask, oh_categorical_columns, classes_dictionary] = data2onehot(
                    np.r_[cx_train, cx_test], np.r_[mask_train, mask_test], numerical_columns, categorical_columns)

                # We scale the features with a min max scaler that will preserve the one-hot encoding
                oh_data_train = oh_data[:x_train.shape[0], :]
                oh_data_test = oh_data[x_train.shape[0]:, :]

                oh_mask_train = oh_mask[:x_train.shape[0], :]
                oh_num_mask_train = oh_mask[:x_train.shape[0], :]
                oh_cat_mask_train = oh_mask[:x_train.shape[0], :]

                oh_mask_test = oh_mask[x_train.shape[0]:, :]
                oh_num_mask_test = oh_mask[x_train.shape[0]:, :]
                oh_cat_mask_test = oh_mask[x_train.shape[0]:, :]

                # Scaler
                scaler_train = preprocessing.MinMaxScaler()
                oh_data_train = scaler_train.fit_transform(oh_data_train)

                scaler_test = preprocessing.MinMaxScaler()
                oh_data_test = scaler_test.fit_transform(oh_data_test)
                # Now we are ready to impute the missing values on the training set!
                imputer_train = GINN(
                    oh_data_train,
                    oh_mask_train,
                    oh_num_mask_train,
                    oh_cat_mask_train,
                    oh_categorical_columns,
                    numerical_columns,
                    categorical_columns
                )
                # Transform
                imputer_train.fit(epochs=1)
                imputed_train = scaler_train.inverse_transform(imputer_train.transform())

                # Impute test
                imputer_train.add_data(
                    oh_data_test,
                    oh_mask_test,
                    oh_num_mask_test,
                    oh_cat_mask_test
                    )
                
                imputed_test = imputer_train.transform()
                imputed_test = scaler_test.inverse_transform(imputed_test[x_train.shape[0]:])
                
                # print(imputed_train[0])
                # Rebuild construct matrix
                if categorical_columns != []:
                    # Rebuild train
                    D_inverse_tr = inverse_onehot(cx_train.shape, imputed_train, oh_categorical_columns, classes_dictionary)
                    imputed_train = order_by_address(D_inverse_tr, num_cols=numerical_columns, cat_cols=categorical_columns)
                    # Rebuild test
                    D_inverse_te = inverse_onehot(cx_test.shape, imputed_test, oh_categorical_columns, classes_dictionary)
                    imputed_test = order_by_address(D_inverse_te, num_cols=numerical_columns, cat_cols=categorical_columns)
                # Check the approximation of each element
                imputed_train_checked = check_approximation(imputed_train, cx_train)
                imputed_test_checked = check_approximation(imputed_test, cx_test)
                # Write result
                imputed_path = os.path.join(imputed_dataset, file_name)
                write_file(imputed_train_checked, imputed_test_checked, imputed_path, 'GINN', missingness, i)
Ejemplo n.º 10
0
    def get_people(self):
        reader = csv_reader(COUNCIL_PAGE, header=True)
        for row in reader:
            kwargs = {'role': 'candidate'}
            email = None
            links = []
            extra = {}
            offices = []

            for k, v in row.items():
                v = v.strip()
                if not v:
                    continue

                k = k.strip()
                match = re.search(r'\AOffice (\d): ', k)
                if match:
                    index = int(match.group(1))
                    while index > len(offices):
                        offices.append({})
                    if k[10:] == 'Type':
                        offices[index - 1]['note'] = v
                    elif k[10:] in CONTACT_TYPE_KEYS:
                        offices[index - 1][CONTACT_TYPE_KEYS[k[10:]]] = v
                    else:
                        raise Exception(k)
                elif k == 'Party Name':
                    kwargs['party'] = PARTY_MAP[v]
                elif k in KEYS:
                    kwargs[KEYS[k]] = v
                elif k == 'Email':
                    email = v
                elif k in LINKS_KEYS:
                    links.append({'url': v, 'note': k})
                elif k in IGNORE_KEYS:
                    continue
                elif k in EXTRA_KEYS:
                    extra[re.sub(r'[^a-z0-9_]', '',
                                 k.lower().replace(' ', '_'))] = v
                else:
                    raise Exception(k)

            contacts = []
            for office in offices:
                for _, type in CONTACT_TYPE_KEYS.items():
                    if office.get(type):
                        contacts.push({
                            'note': office['note'],
                            type: type,
                            'value': office[type]
                        })

            if 'name' in kwargs:
                p = Legislator(**kwargs)
                p.add_source(COUNCIL_PAGE)
                if email:
                    p.add_contact('email', email, None)
                for link in links:
                    p.add_link(**links)
                for contact in contacts:
                    p.add_contact(**contact)
                for k, v in extra.items():
                    p.add_extra(k, v)
                yield p
Ejemplo n.º 11
0
        levels = MaxNLocator(nbins=15).bin_boundaries(z.min(), z.max())
        cmap = plt.get_cmap('PiYG')

        plt.contourf(x[:-1, :-1] + dx / 2.,
                     y[:-1, :-1] + dy / 2.,
                     z,
                     levels=levels,
                     cmap=cmap)
        plt.colorbar()
        plt.title('Density estimation by SOINN')
        plt.show()


if __name__ == '__main__':
    from utils import csv_reader
    r = csv_reader('reg_intro.csv')
    X, y = r.separate_label()
    the_reg = ISOINNregressor(smooth=-0.4, K=15)
    the_reg.fit(X, y)
    #    the_reg.draw_density()
    test_x = []
    draw_x = []
    for i in range(50):
        test_x.append(array([i / 50.0]))
        draw_x.append(i / 50.0)
    test_y = the_reg.predict(test_x)
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(draw_x, test_y, 'k-')
    plt.axis('off')
Ejemplo n.º 12
0
def main(args):
    '''Main function for prepare processing data
    
    Args:
        - from_id: start index to file list
        - to_id: end index to file list
        - review_missing_flag: Set flag is True create missing data
        - review_imputed_flag: Set flag is True imputed missing value

    Returns:
        - Write file missing data
        - Write file imputed values 
    '''
    # Flag
    review_missing_flag = args.review_missing_flag
    review_imputed_flag = args.review_imputed_flag

    # Parameters
    from_id = args.from_id
    to_id = args.to_id
    n_iterations = args.n_iterations
    fold_size = 2 * args.n_iterations + 1  # fold_size start from index 1
    random.seed(0)
    missingness_flag = [0, 10, 20, 30, 40, 50]  # t% missing data  
    binary_flag = [1, 0, 0, 0, 1, 1]  # 1 activate imputation algorithm
    imputation_flag = [i for i, impf in enumerate(binary_flag) if impf == 1]

    # Load data and introduce missingness
    for i_file in range(from_id, to_id):
        file_name = file_list[i_file]
        print(datetime.datetime.now(), "File {}: {}".format(i_file, file_name))
        # Data Processing
        if review_missing_flag:
            # Data loader
            D_train = np.loadtxt(data_folder + '/train1/' + file_name + '_train1.dat', delimiter=',')
            D_val = np.loadtxt(data_folder + '/val/' + file_name + '_val.dat', delimiter=',')
            D_test = np.loadtxt(data_folder + '/test/' + file_name + '_test.dat', delimiter=',')

            X_full = np.concatenate((D_train, D_val, D_test), axis=0)

            # K-Fold Cross Validation approach first time
            kf_1 = KFold(n_splits=n_iterations, shuffle=True)
            kf_1.split(X_full)
            # K-Fold Cross Validation approach second time
            kf_2 = KFold(n_splits=n_iterations, shuffle=True)
            kf_2.split(X_full)
            # Save file csv train(i)-test(i) i=<1, iterations>
            K_Fold_cross_validation(kf_1, X_full, data_K_Fold, file_name, 0)
            # Save file csv train(i)-test(i) i=<iterations, 2xiterations>
            K_Fold_cross_validation(kf_2, X_full, data_K_Fold, file_name, n_iterations)

            # Loading data K-Fold 
            for i in tqdm(range(1, fold_size)):
                (D_train, D_test) = csv_reader(data_K_Fold, file_name, i, method='original_data', missingness=None)
                for missingness in missingness_flag:
                    D_train_missing = missing_data_generation(D_train, missingness)
                    D_test_missing = missing_data_generation(D_test, missingness)
                    write_file(D_train_missing, D_test_missing, data_K_Fold, file_name, missingness, i)

        # Loading data processed and imputed dataset
        if review_imputed_flag:
            for i in tqdm(range(1, fold_size)):
                for missingness in missingness_flag:
                    (D_missing_train, D_missing_test) = csv_reader(data_K_Fold, file_name, i, method='data_missing',
                                                                   missingness=missingness)
                    for imp_flag in imputation_flag:
                        imputed_train, imputed_test, imp_name = imputation_method(D_missing_train, D_missing_test,
                                                                                  imp_flag, missingness)
                        imputation_path = os.path.join(file_name, imp_name)
                        write_file(imputed_train, imputed_test, imputed_dataset, imputation_path, missingness, i)
Ejemplo n.º 13
0
def main(args):
    """ Main function for classification with imputed dataset
    
    Args:
        - from_id: start index to file list
        - to_id: end index to file list
        - fold_size: fold_size start from index 1 

    Returns:
        -     
    """
    # Input parameters
    from_id = args.from_id
    to_id = args.to_id
    fold_size = args.fold_size

    # Initial parameters
    binary_classifiers = [1, 1, 1, 1]  # 1: Activate or 0: Deactivate
    classfication_flag = [
        i for i, clsf in enumerate(binary_classifiers) if clsf == 1
    ]
    missingness_flag = [0, 10, 20, 30, 40, 50]  # t% missing data

    # Loading data
    for i_file in range(from_id, to_id):
        file_name = file_list[i_file]
        print(datetime.datetime.now(), "File {}: {}".format(i_file, file_name))
        file_data_path = os.path.join(imputed_dataset, file_name)
        result_data_path = os.path.join(result_path, file_name)
        for name_imputation in os.listdir(file_data_path):
            for missing in missingness_flag:
                for clf_flag in classfication_flag:
                    dict_eval = {
                        'accuracy': [],
                        'p_macro': [],
                        'r_macro': [],
                        'f1_macro': [],
                        'p_micro': [],
                        'r_micro': [],
                        'f1_micro': []
                    }
                    for i in range(1, fold_size):
                        D_train, D_test = csv_reader(file_data_path,
                                                     name_imputation,
                                                     i,
                                                     method='data_missing',
                                                     missingness=missing)

                        features_D_train = D_train[:, :-1]
                        labels_D_train = D_train[:, -1].astype(np.int32)
                        features_D_test = D_test[:, :-1]
                        labels_D_test = D_test[:, -1].astype(np.int32)

                        classes = np.unique(labels_D_test)
                        n_classes = len(classes)

                        labels_predicted, name_classification_algo = model_prediction(
                            features_D_train, features_D_test, labels_D_train,
                            clf_flag, n_classes)
                        accuracy, p_macro, r_macro, f1_macro, p_micro, r_micro, f1_micro = evaluation_report(
                            labels_predicted, labels_D_test)
                        dict_eval['accuracy'].append(accuracy)
                        dict_eval['p_macro'].append(p_macro)
                        dict_eval['r_macro'].append(r_macro)
                        dict_eval['f1_macro'].append(f1_macro)
                        dict_eval['p_micro'].append(p_micro)
                        dict_eval['r_micro'].append(r_micro)
                        dict_eval['f1_micro'].append(f1_micro)

                    write_report(dict_eval, result_data_path, name_imputation,
                                 missing, name_classification_algo)