Ejemplo n.º 1
0
def main():
    start_time = time.time()
    template_location = os.path.join(current_dir(), 'output-template')
    css_file = os.path.join(template_location, 'static', 'all.css')
    template_file = os.path.join(template_location, 'example.html')

    args = sys.argv[1:]

    if len(args) < 3:
        raise Exception("This script should be run using. ./run <destination.xml> <taxonomy.xml> /path/to/output_location")

    # create the output folder and move the static files into it
    output_location = args[2]
    static_location = os.path.join(output_location, 'static')
    make_folder(output_location)
    make_folder(static_location)

    shutil.copy2(css_file, static_location)
    print "copied css from %s to %s" % (css_file, static_location)

    try:
        taxonomies = process_taxonomies(args[1])
    except:
        raise Exception("Taxonomies could not be processed :(")

    try:
        process_destinations(args[0], taxonomies, template_file, output_location)
    except:
        raise Exception("Destinations could not be processed :(")

    print "\nexecution took: %0.2f seconds" % (time.time() - start_time)

    print "\nThank you for considering me for this position."
    print "To see more amazing code like this be sure to send me through to the next round."
Ejemplo n.º 2
0
        feature = self.avg_pool(out)
        feature = feature.view(feature.shape[0], -1)
        # logits = self.classifier(feature)

        return feature


meanstd = {
    'cifar10': [(0.49139968, 0.48215841, 0.44653091),
                (0.24703223, 0.24348513, 0.26158784)],
    'svhn': [(0.4376821, 0.4437697, 0.47280442),
             (0.19803012, 0.20101562, 0.19703614)]
}

if __name__ == "__main__":
    make_folder(args.save_path)

    with open(args.index_path, 'r') as f:
        label_indices = [int(line.rstrip('\n')) for line in f]

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize(*meanstd[args.dataset])])

    if args.dataset == "cifar10":
        train_dataset = dsets.CIFAR10(root=args.data_path,
                                      train=True,
                                      download=True,
                                      transform=data_transform)
    elif args.dataset == "svhn":
        train_dataset = dsets.SVHN(root=args.data_path,
Ejemplo n.º 3
0
def process_mtbi_data_files(_years):
    start_year = _years[0]
    end_year = _years[-1]
    print("\n***** PROCESSING MTBI DATA FROM {} TO {} *****".format(
        start_year, end_year))
    year_folders = []
    for year in _years:
        year_folders.append(constants.INTERVIEW_FILES[year])

    fmli_pipe = utils.concat_data_for_type('fmli', year_folders,
                                           extract_files_path)
    mtbi_pipe = utils.concat_data_for_type('mtbi', year_folders,
                                           extract_files_path)

    # age_pipe = fmli_pipe['AGE_REF'].groupby(fmli_pipe['NEWID']).max()
    age_pipe = fmli_pipe.groupby('NEWID', as_index=False)['AGE_REF'].max()

    # Retain NEWID and FINLWT21 column
    finl_wt_pipe = fmli_pipe[['NEWID', 'FINLWT21']]

    # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files
    sum_finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'],
                                         as_index=False)['FINLWT21'].sum()
    sum_finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True)

    # Sum(COST) grouped by NEWID, UCC in 'mtbi' files
    monthly_age_spend_pipe = mtbi_pipe.groupby(['NEWID', 'UCC'],
                                               as_index=False)['COST'].sum()

    # Join AGE_REF by NEWID from 'fmli' to 'mbti' files
    monthly_age_spend_pipe = pd.merge(monthly_age_spend_pipe,
                                      age_pipe,
                                      on='NEWID',
                                      how='left')
    monthly_age_spend_pipe.drop_duplicates(inplace=True)

    # Join FINLWT21 by NEWID from 'fmli' to 'mbti' files
    monthly_age_spend_finl_wt_pipe = pd.merge(monthly_age_spend_pipe,
                                              finl_wt_pipe,
                                              on='NEWID',
                                              how='left')
    monthly_age_spend_finl_wt_pipe.drop_duplicates(inplace=True)

    # Sum(WT_COST) grouped by AGE_REF, UCC
    monthly_age_spend_finl_wt_pipe['WT_COST'] = monthly_age_spend_finl_wt_pipe[
        'FINLWT21'] * monthly_age_spend_finl_wt_pipe['COST']
    monthly_age_ucc_spend_pipe = monthly_age_spend_finl_wt_pipe.groupby(
        ['AGE_REF', 'UCC'], as_index=False)['WT_COST'].sum()

    # Join the denominator Sum(FINLWT21) grouped by AGE_REF from 'fmli' to spend pipe
    monthly_age_ucc_spend_pipe = pd.merge(monthly_age_ucc_spend_pipe,
                                          sum_finl_wt_pipe,
                                          on='AGE_REF',
                                          how='left')
    monthly_age_ucc_spend_pipe.drop_duplicates(inplace=True)

    # Calcuate the average spend by dividing Sum(FINLWT21 * COST) by the denominator Sum(FINLWT21) grouped by AGE_REF
    monthly_age_ucc_spend_pipe['AVG_SPEND'] = (
        ((monthly_age_ucc_spend_pipe['WT_COST'] /
          monthly_age_ucc_spend_pipe['SUM_FINLWT21']) *
         YEAR_BUCKET_SIZE_MULTIPLIER) / config.YEAR_BUCKET_SIZE).round(2)

    monthly_age_ucc_spend_pipe.drop(columns='SUM_FINLWT21', inplace=True)

    # Filter rows with AGE_REF >= 20 and AGE_REF <= 80
    monthly_age_ucc_spend_pipe = monthly_age_ucc_spend_pipe[
        (monthly_age_ucc_spend_pipe['AGE_REF'] >= config.AGE_THRESHOLDS['min'])
        & (monthly_age_ucc_spend_pipe['AGE_REF'] <=
           config.AGE_THRESHOLDS['max'])]
    monthly_age_ucc_spend_pipe['AGE_REF'] = monthly_age_ucc_spend_pipe[
        'AGE_REF'].astype(int)
    print(monthly_age_ucc_spend_pipe)

    # Export processed data
    utils.make_folder(config.EXPORT_FILES_PATH)
    export_file = os.path.join(
        config.EXPORT_FILES_PATH,
        "mtbi_avg_spend_intrvw_{}_to_{}.csv".format(start_year, end_year))
    monthly_age_ucc_spend_pipe.to_csv(export_file, index=False)
    print("Exporting data to {}".format(export_file))

    # Reshape and export processed data
    reshaped_data = monthly_age_ucc_spend_pipe.pivot('UCC', 'AGE_REF',
                                                     'AVG_SPEND')
    reshaped_data = pd.merge(reshaped_data,
                             utils.ucc_pipe,
                             on='UCC',
                             how='left')
    reshaped_data.drop_duplicates(inplace=True)
    reshaped_data.rename(columns={'UCC_DESCRIPTION': 'UCC_DESCRIPTION_COPY'},
                         inplace=True)
    reshaped_data.insert(1, 'UCC_DESCRIPTION',
                         reshaped_data['UCC_DESCRIPTION_COPY'])
    reshaped_data.drop(columns='UCC_DESCRIPTION_COPY', inplace=True)
    reshaped_file = os.path.join(
        config.EXPORT_FILES_PATH,
        "mtbi_reshaped_avg_spend_intrvw_{}_to_{}.csv".format(
            start_year, end_year))
    reshaped_data.to_csv(reshaped_file, index=False)
Ejemplo n.º 4
0
def process_fmli_data_files(_years):
    start_year = _years[0]
    end_year = _years[-1]
    print("\n***** PROCESSING FMLI DATA FROM {} TO {} *****".format(
        start_year, end_year))
    year_folders = []
    for year in _years:
        year_folders.append(constants.INTERVIEW_FILES[year])

    fmli_pipe = utils.concat_data_for_type('fmli', year_folders,
                                           extract_files_path)

    expn_vars_dict = get_expense_vars_dict(fmli_pipe)

    # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files
    finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'],
                                     as_index=False)['FINLWT21'].sum()
    finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True)
    # print(finl_wt_pipe)

    # Get the weighted spend
    for key, val in expn_vars_dict.items():
        fmli_pipe['WT_' + key] = fmli_pipe['FINLWT21'] * (fmli_pipe[val[0]] +
                                                          fmli_pipe[val[1]])

    # Sum(expn_vars) grouped by AGE_REF
    wt_expn_vars = [x for x in list(fmli_pipe) if x.startswith('WT_')]
    spend_pipe = fmli_pipe.groupby(['AGE_REF'],
                                   as_index=False)[wt_expn_vars].sum().round(2)
    spend_pipe = pd.merge(spend_pipe, finl_wt_pipe, on='AGE_REF', how='left')
    spend_pipe.drop_duplicates(inplace=True)

    # Calculate the average spend by dividing weighted spend by the denominator Sum(FINLWT21) grouped by AGE_REF
    for key, val in expn_vars_dict.items():
        spend_pipe[key] = (
            ((spend_pipe['WT_' + key] / spend_pipe['SUM_FINLWT21']) *
             YEAR_BUCKET_SIZE_MULTIPLIER) / config.YEAR_BUCKET_SIZE).round(2)
        spend_pipe.drop(columns='WT_' + key, inplace=True)

    spend_pipe.drop(columns='SUM_FINLWT21', inplace=True)

    # Filter rows with AGE_REF >= 20 and AGE_REF <= 80
    spend_pipe = spend_pipe[
        (spend_pipe['AGE_REF'] >= config.AGE_THRESHOLDS['min'])
        & (spend_pipe['AGE_REF'] <= config.AGE_THRESHOLDS['max'])]
    spend_pipe['AGE_REF'] = spend_pipe['AGE_REF'].astype(int)
    print(spend_pipe)

    # Export processed data
    utils.make_folder(config.EXPORT_FILES_PATH)
    export_file = os.path.join(
        config.EXPORT_FILES_PATH,
        "fmli_avg_spend_intrvw_{}_to_{}.csv".format(start_year, end_year))
    spend_pipe.to_csv(export_file, index=False)
    print("Exporting data to {}".format(export_file))

    # Reshape and export processed data
    reshaped_data = spend_pipe.set_index('AGE_REF')
    reshaped_data = reshaped_data.T
    reshaped_data.reset_index(drop=False, inplace=True)
    reshaped_data.rename(columns={'index': 'CAT_CODE'}, inplace=True)
    reshaped_data = pd.merge(reshaped_data,
                             utils.fmli_category_pipe,
                             on='CAT_CODE',
                             how='left')
    reshaped_data.drop_duplicates(inplace=True)
    reshaped_data.rename(columns={'CAT_DESCRIPTION': 'CAT_DESCRIPTION_COPY'},
                         inplace=True)
    reshaped_data.insert(1, 'CAT_DESCRIPTION',
                         reshaped_data['CAT_DESCRIPTION_COPY'])
    reshaped_data.drop(columns='CAT_DESCRIPTION_COPY', inplace=True)
    reshaped_file = os.path.join(
        config.EXPORT_FILES_PATH,
        "fmli_reshaped_avg_spend_intrvw_{}_to_{}.csv".format(
            start_year, end_year))
    reshaped_data.to_csv(reshaped_file, index=False)
Ejemplo n.º 5
0
def main(config):
    # For fast training
    cudnn.benchmark = True

    ##### Dataloader #####
    config.video_path = os.path.join(config.root_path, config.video_path)
    config.annotation_path = os.path.join(config.root_path,
                                          config.annotation_path)
    config.mean = get_mean(config.norm_value, dataset=config.mean_dataset)

    if config.no_mean_norm and not config.std_norm:
        norm_method = Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    elif not config.std_norm:
        norm_method = Normalize(config.mean, [1, 1, 1])

    config.scales = [config.initial_scale]
    for i in range(1, config.n_scales):
        config.scales.append(config.scales[-1] * config.scale_step)

    if config.train:
        assert config.train_crop in ['random', 'corner', 'center']
        if config.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(config.scales,
                                               config.sample_size)
        elif config.train_crop == 'corner':
            crop_method = MultiScaleCornerCrop(config.scales,
                                               config.sample_size)
        elif config.train_crop == 'center':
            crop_method = MultiScaleCornerCrop(config.scales,
                                               config.sample_size,
                                               crop_positions=['c'])
        spatial_transform = Compose([
            crop_method,
            RandomHorizontalFlip(),
            ToTensor(config.norm_value), norm_method
        ])
        temporal_transform = TemporalRandomCrop(config.n_frames)
        target_transform = ClassLabel()

        print("=" * 30, "\nLoading data...")
        training_data = get_training_set(config, spatial_transform,
                                         temporal_transform, target_transform)

        train_loader = torch.utils.data.DataLoader(
            training_data,
            batch_size=config.batch_size,
            shuffle=True,
            num_workers=config.num_workers,
            pin_memory=True)
    else:
        spatial_transform = Compose([
            Scale(config.sample_size),
            CenterCrop(config.sample_size),
            ToTensor(config.norm_value), norm_method
        ])
        temporal_transform = LoopPadding(config.n_frames)
        target_transform = ClassLabel()
        validation_data = get_validation_set(config, spatial_transform,
                                             temporal_transform,
                                             target_transform)
        val_loader = torch.utils.data.DataLoader(
            validation_data,
            batch_size=config.batch_size,
            shuffle=False,
            num_workers=config.num_workers,
            pin_memory=True)

    ##### End dataloader #####

    # Use Big-GAN dataset to test only
    # The random data is used in the trainer
    # Need to pre-process data and use the dataloader (above)

    # config.n_class = len(glob.glob(os.path.join(config.root_path, config.video_path)))

    ## Data loader
    print('number class:', config.n_class)
    # # Data loader
    # data_loader = Data_Loader(config.train, config.dataset, config.image_path, config.imsize,
    #                          config.batch_size, shuf=config.train)

    # Create directories if not exist
    make_folder(config.model_save_path, config.version)
    # make_folder(config.sample_path, config.version)
    make_folder(config.log_path, config.version)

    if config.train:
        if config.model == 'dvd-gan':
            trainer = Trainer(train_loader, config)
        else:
            trainer = None

        trainer.train()
    else:
        tester = Tester(val_loader, config)
        tester.test()
Ejemplo n.º 6
0
def main(config):
    cudnn.benchmark = True

    print("Loading data...")
    data_loader = Data_Loader(config.train,
                              config.dataset,
                              config.imsize,
                              config.batch_size,
                              config.image_path,
                              shuf=config.train)
    print('Done.')

    # Create directories if these do not exist
    for _subdir in ['gen', 'gen_avg', 'gen_ema', 'gen_ema_slow']:
        make_folder(config.model_save_path, _subdir)
        make_folder(config.sample_path, _subdir)
    make_folder(config.log_path)
    make_folder(config.best_path)
    if config.backup_freq > 0:
        make_folder(config.bup_path)
    if config.dataset == 'imagenet' and config.fid_freq > 0:
        make_folder(config.metrics_path)

    # Train
    trainer = Trainer(data_loader.loader(), config)
    trainer.train()
Ejemplo n.º 7
0
# Dependencies
import pandas as pd

from tqdm import tqdm

from constants import LABELS_FILE, IMAGES_PATH, SAVE_PATH, IMAGE_SIZE, SAVE_CSV
from utils import make_folder, process_image, get_all_files

# Making the folder to save images
make_folder(SAVE_PATH)

# Read the Labels
label_csv = pd.read_csv(LABELS_FILE)

labels = label_csv["count"].values

del label_csv

# Array to store all the data
dataset = []

# Read and process all images
images = get_all_files(IMAGES_PATH)

for index, image in tqdm(enumerate(images)):
    try:
        saved_path = process_image(IMAGES_PATH, image, SAVE_PATH,
                                   (IMAGE_SIZE, IMAGE_SIZE))
        dataset.append([saved_path, labels[index]])
    except Exception as ex:
        pass
import options
import utils
from trainer import Validator

if __name__ == "__main__":

    print("=======================================================")
    print("Evaluate / generate 3D Point Cloud generation model.")
    print("=======================================================")

    cfg = options.get_arguments()
    cfg.batchSize = cfg.inputViewN
    # cfg.chunkSize = 50

    RESULTS_PATH = f"results/{cfg.model}_{cfg.experiment}"
    utils.make_folder(RESULTS_PATH)

    dataloaders = utils.make_data_fixed(cfg)
    test_dataset = dataloaders[1].dataset

    model = utils.build_structure_generator(cfg).to(cfg.device)

    validator = Validator(cfg, test_dataset) 

    hist = validator.eval(model)
    hist.to_csv(f"{RESULTS_PATH}.csv", index=False)
Ejemplo n.º 9
0
def export_god_file(god_pipe, bucket_size, file_type):
    utils.make_folder(config.GOODNESS_OF_DATA_FOLDER_PATH)
    god_file_name = "{}_god_{}yrs.csv".format(file_type, bucket_size)
    god_file = os.path.join(config.GOODNESS_OF_DATA_FOLDER_PATH, god_file_name)
    god_pipe.to_csv(god_file, index=False)
    print("Exporting data to {}".format(god_file))
Ejemplo n.º 10
0
def export_goodness_of_fit_files(file_type, gof_pipe, part_file_name):
    utils.make_folder(config.GOODNESS_OF_FIT_FOLDER_PATH)
    gof_file_name = "{}_gof_{}.csv".format(file_type, part_file_name)
    gof_file = os.path.join(config.GOODNESS_OF_FIT_FOLDER_PATH, gof_file_name)
    gof_pipe.to_csv(gof_file, index=False)
    print("Exporting data to {}".format(gof_file))
Ejemplo n.º 11
0
    def _get_state_data(self):
        """
		This function downloads the data from autoredistrict.org's ftp. After some 
		minor cleaning, the data is saved as a geopandas DataFrame.

		NOTE: currently the shape files on autoredistrict's ftp are districts
		instead of precincts as before. Don't use wget. 

		INPUT:
		----------------------------------------------------------------------------
		self.state: string, postal ID for self.state and key to "self.states" dictionary
		wget: boolian (default=False), whether to download new shape files.

		OUTPUT:
		----------------------------------------------------------------------------
		None, but DataFrame is pickled in ../Data-Files/<self.state> alongside the shape
		files.
		"""
        # make folder if it doesn't already exist
        prefix = '../Data-Files/' + self.state
        utils.make_folder(prefix)

        # import shape files
        url = 'ftp://autoredistrict.org/pub/shapefiles2/' + self.state_name + '/2010/2012/vtd/tl*'
        if self.wget:
            call([
                'wget',
                '-P',
                prefix,
            ])

        # read shape files into geopandas
        geo_path = glob(prefix + '/tl*.shp')[0]
        geo_df = geo.GeoDataFrame.from_file(geo_path)
        geo_df.CD_2010 = geo_df.CD_2010.astype(int)

        # drops totals and other non-precinct observations
        geo_df = geo_df[geo_df.CD_2010 >= 0]

        # -------------------------------------------------------------------------
        # ADJUST ASPECT RATIO HERE:
        # -------------------------------------------------------------------------
        geo_df = geo_df.to_crs(epsg=4267)
        # geo_df.geometry = geo_df.geometry.scale(xfact=1/100000, yfact=1/100000, zfact=1.0, origin=(0, 0))

        # simplify geometries for faster image rendering
        # bigger number gives a smaller file size
        geo_df.geometry = geo_df.geometry.simplify(.01).buffer(.007)

        # add longitude and latitude
        lonlat = np.array([t.centroid.coords.xy for t in geo_df.geometry])
        geo_df['INTPTLON10'] = lonlat[:, 0]
        geo_df['INTPTLAT10'] = lonlat[:, 1]

        # -------------------------------------------------------------------------
        # make sure congressional districts are numbered starting at 0
        geo_df.CD_2010 -= geo_df.CD_2010.min()

        # correct a few curiosities
        if self.state in ['KY']:
            geo_df.drop([
                'POP_BLACK', 'POP_WHITE', 'POP_ASIAN', 'POP_HAWAII',
                'POP_HISPAN', 'POP_INDIAN', 'POP_MULTI', 'POP_OTHER',
                'POP_TOTAL'
            ],
                        axis=1,
                        inplace=True)

            geo_df.rename(index=str,
                          columns={
                              'VAP_BLACK': 'POP_BLACK',
                              'VAP_WHITE': 'POP_WHITE',
                              'VAP_ASIAN': 'POP_ASIAN',
                              'VAP_HAWAII': 'POP_HAWAII',
                              'VAP_HISPAN': 'POP_HISPAN',
                              'VAP_INDIAN': 'POP_INDIAN',
                              'VAP_MULTI': 'POP_MULTI',
                              'VAP_OTHER': 'POP_OTHER',
                              'VAP_TOT': 'POP_TOTAL'
                          },
                          inplace=True)

        # percent black in each precinct, account for precincts with zero population
        geo_df['BLACK_PCT'] = np.maximum(
            geo_df['POP_BLACK'] / geo_df['POP_TOTAL'], 0)
        geo_df.loc[np.isfinite(geo_df['POP_TOTAL']) == False, 'BLACK_PCT'] = 0
        geo_df.fillna({'BLACK_PCT': 0}, inplace=True)

        # political breakdown
        geo_df['DEM'] = geo_df[['PRES04_DEM', 'PRES08_DEM',
                                'PRES12_DEM']].mean(axis=1)
        geo_df['REP'] = geo_df[['PRES04_REP', 'PRES08_REP',
                                'PRES12_REP']].mean(axis=1)
        geo_df['REP_PCT'] = geo_df['REP'] / (geo_df['DEM'] + geo_df['REP'])
        geo_df['DEM_PCT'] = geo_df['DEM'] / (geo_df['DEM'] + geo_df['REP'])

        # unpack multipolygons
        self.pcnct_df = geo_df  #utils.unpack_multipolygons(geo_df)

        # pickle dataframe for future use
        pickle.dump(self.pcnct_df,
                    open(prefix + '/precinct_data.p', 'wb'),
                    protocol=2)
Ejemplo n.º 12
0
def process_interview_data_files():
    fmli_pipe = concat_data_for_type("fmli")
    mtbi_pipe = concat_data_for_type("mtbi")

    # age_pipe = fmli_pipe['AGE_REF'].groupby(fmli_pipe['NEWID']).max()
    age_pipe = fmli_pipe.groupby('NEWID', as_index=False)['AGE_REF'].max()

    # Retain NEWID and FINLWT21 column
    finl_wt_pipe = fmli_pipe[['NEWID', 'FINLWT21']]
    print(finl_wt_pipe)

    # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files
    sum_finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'],
                                         as_index=False)['FINLWT21'].sum()
    sum_finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True)

    # Sum(COST) grouped by NEWID, UCC in 'mtbi' files
    monthly_age_spend_pipe = mtbi_pipe.groupby(['NEWID', 'UCC'],
                                               as_index=False)['COST'].sum()

    # Join AGE_REF by NEWID from 'fmli' to 'mbti' files
    monthly_age_spend_pipe = pd.merge(monthly_age_spend_pipe,
                                      age_pipe,
                                      on='NEWID',
                                      how='left')
    monthly_age_spend_pipe.drop_duplicates(inplace=True)

    # Join FINLWT21 by NEWID from 'fmli' to 'mbti' files
    monthly_age_spend_finl_wt_pipe = pd.merge(monthly_age_spend_pipe,
                                              finl_wt_pipe,
                                              on='NEWID',
                                              how='left')
    monthly_age_spend_finl_wt_pipe.drop_duplicates(inplace=True)

    # Sum(WT_COST) grouped by AGE_REF, UCC
    monthly_age_spend_finl_wt_pipe['WT_COST'] = monthly_age_spend_finl_wt_pipe['FINLWT21'] * \
                                                monthly_age_spend_finl_wt_pipe['COST']
    monthly_age_ucc_spend_pipe = monthly_age_spend_finl_wt_pipe.groupby(
        ['AGE_REF', 'UCC'], as_index=False)['WT_COST'].sum()

    # Join the denominator Sum(FINLWT21) grouped by AGE_REF from 'fmli' to spend pipe
    monthly_age_ucc_spend_pipe = pd.merge(monthly_age_ucc_spend_pipe,
                                          sum_finl_wt_pipe,
                                          on='AGE_REF',
                                          how='left')
    monthly_age_ucc_spend_pipe.drop_duplicates(inplace=True)

    # Divide Sum(FINLWT21 * COST) by the denominator Sum(FINLWT21) grouped by AGE_REF
    monthly_age_ucc_spend_pipe['AVG_SPEND'] = (
        (monthly_age_ucc_spend_pipe['WT_COST'] /
         monthly_age_ucc_spend_pipe['SUM_FINLWT21']) * 20).round(2)
    monthly_age_ucc_spend_pipe = monthly_age_ucc_spend_pipe[
        (monthly_age_ucc_spend_pipe['AGE_REF'] >= 20)
        & (monthly_age_ucc_spend_pipe['AGE_REF'] <= 80)]
    monthly_age_ucc_spend_pipe['AGE_REF'] = monthly_age_ucc_spend_pipe[
        'AGE_REF'].astype(int)
    print(monthly_age_ucc_spend_pipe)

    # Export processed data
    utils.make_folder(config.EXPORT_FILES_PATH)
    export_file = os.path.join(config.EXPORT_FILES_PATH,
                               "test_export_file.csv")
    monthly_age_ucc_spend_pipe.to_csv(export_file, index=False)
    print("Exporting data to {}".format(export_file))

    # Reshape and export processed data
    reshaped_data = monthly_age_ucc_spend_pipe.pivot('UCC', 'AGE_REF',
                                                     'AVG_SPEND')
    reshaped_data = pd.merge(reshaped_data,
                             data_dict_pipe,
                             on='UCC',
                             how='left')
    reshaped_data.drop_duplicates(inplace=True)
    reshaped_data.insert(1, 'UCC_DESCRIPTION', reshaped_data['UCC_DESC'])
    reshaped_data.drop(columns='UCC_DESC', inplace=True)
    print(reshaped_data)
    reshaped_file = os.path.join(config.EXPORT_FILES_PATH,
                                 "test_reshaped_file.csv")
    reshaped_data.to_csv(reshaped_file, index=False)
Ejemplo n.º 13
0
def process_fmli_data_files():
    fmli_pipe = concat_data_for_type('fmli')
    expense_vars = [
        x for x in list(fmli_pipe) if x.endswith('PQ') or x.endswith('CQ')
    ]
    expense_vars_dict = {}

    for expense_var in expense_vars:
        expense_vars_dict[expense_var[0:-2]] = []

    for expense_var in expense_vars:
        expense_vars_dict.get(expense_var[0:-2]).append(expense_var)
    print(expense_vars_dict)

    # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files
    finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'],
                                     as_index=False)['FINLWT21'].sum()
    finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True)
    # print(finl_wt_pipe)

    # Get the weighted spend
    for key, val in expense_vars_dict.items():
        if len(val) < 2:
            fmli_pipe['WT_' + key] = fmli_pipe['FINLWT21'] * fmli_pipe[val[0]]
        else:
            fmli_pipe['WT_' +
                      key] = fmli_pipe['FINLWT21'] * (fmli_pipe[val[0]] +
                                                      fmli_pipe[val[1]])

    # Sum(expense_vars) grouped by AGE_REF
    wt_expense_vars = [x for x in list(fmli_pipe) if x.startswith('WT_')]
    spend_pipe = fmli_pipe.groupby(
        ['AGE_REF'], as_index=False)[wt_expense_vars].sum().round(2)
    spend_pipe = pd.merge(spend_pipe, finl_wt_pipe, on='AGE_REF', how='left')
    spend_pipe.drop_duplicates(inplace=True)

    for key, val in expense_vars_dict.items():
        spend_pipe[key] = (
            (spend_pipe['WT_' + key] / spend_pipe['SUM_FINLWT21']) *
            20).round(2)
        spend_pipe.drop(columns='WT_' + key, inplace=True)

    spend_pipe.drop(columns='SUM_FINLWT21', inplace=True)
    spend_pipe = spend_pipe[(spend_pipe['AGE_REF'] >= 20)
                            & (spend_pipe['AGE_REF'] <= 80)]
    spend_pipe['AGE_REF'] = spend_pipe['AGE_REF'].astype(int)
    print(spend_pipe)

    # Export processed data
    utils.make_folder(config.EXPORT_FILES_PATH)
    export_file = os.path.join(config.EXPORT_FILES_PATH,
                               "test_fmli_export_file.csv")
    spend_pipe.to_csv(export_file, index=False)
    print("Exporting data to {}".format(export_file))

    # Reshape and export processed data
    reshaped_data = spend_pipe.set_index('AGE_REF')
    reshaped_data = reshaped_data.T
    reshaped_data.reset_index(drop=False, inplace=True)
    reshaped_data.rename(columns={'index': 'CAT_CODE'}, inplace=True)
    reshaped_data = pd.merge(reshaped_data,
                             utils.fmli_category_pipe,
                             on='CAT_CODE',
                             how='left')
    reshaped_data.drop_duplicates(inplace=True)
    reshaped_data.rename(columns={'CAT_DESCRIPTION': 'CAT_DESCRIPTION_COPY'},
                         inplace=True)
    reshaped_data.insert(1, 'CAT_DESCRIPTION',
                         reshaped_data['CAT_DESCRIPTION_COPY'])
    reshaped_data.drop(columns='CAT_DESCRIPTION_COPY', inplace=True)
    print(reshaped_data)
    reshaped_file = os.path.join(config.EXPORT_FILES_PATH,
                                 "test_fmli_reshaped_file.csv")
    reshaped_data.to_csv(reshaped_file, index=False)
Ejemplo n.º 14
0
import utils
from trainer import TrainerStage1

if __name__ == "__main__":

    print("=======================================================")
    print("Pretrain structure generator with fixed viewpoints")
    print("=======================================================")

    cfg = options.get_arguments()

    EXPERIMENT = f"{cfg.model}_{cfg.experiment}"
    MODEL_PATH = f"models/{EXPERIMENT}"
    LOG_PATH = f"logs/{EXPERIMENT}"

    utils.make_folder(MODEL_PATH)
    utils.make_folder(LOG_PATH)

    criterions = utils.define_losses()
    dataloaders = utils.make_data_fixed(cfg)

    model = utils.build_structure_generator(cfg).to(cfg.device)
    optimizer = utils.make_optimizer(cfg, model)
    scheduler = utils.make_lr_scheduler(cfg, optimizer)

    logger = utils.make_logger(LOG_PATH)
    writer = utils.make_summary_writer(EXPERIMENT)

    def on_after_epoch(model, df_hist, images, epoch):
        utils.save_best_model(MODEL_PATH, model, df_hist)
        utils.log_hist(logger, df_hist)
Ejemplo n.º 15
0
def main(opt):

    # make folder
    base_path = 'result'
    os.makedirs(base_path, exist_ok=True)
    result_path = make_folder(base_path, opt.save_folder)

    # Dataset
    print(f'Preparing Dataset....{opt.dataset}')
    train_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    test_transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    train_set, test_set = get_dataset(opt.dataset, train_transform,
                                      test_transform)
    if opt.testing:
        train_set = Subset(train_set, range(opt.train_batch_size))
        test_set = Subset(test_set, range(opt.test_batch_size))

    # Load Dataset
    train_loader = DataLoader(train_set,
                              batch_size=opt.train_batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_set,
                             batch_size=opt.test_batch_size,
                             shuffle=False)

    # GPU
    device = 'cuda' if (torch.cuda.is_available() and opt.cuda) else 'cpu'
    print(f'Using {device}')

    # model
    from torchvision.models import vgg16_bn
    print(f'Preparing Model....{opt.model}')
    model = get_model(opt.model, opt.num_classes, pretrained=opt.pretrained)
    model.to(device)

    # resuming
    if opt.resume:
        print('Resuming from checkpoint')
        assert os.path.isdir(f'{opt.resume}')

        checkpoint = torch.load(f'{opt.resume}/{opt.model}_ckpt.pth')
        model.load_state_dict(checkpoint['model'])

        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
        train_result = checkpoint['train_result']
        test_result = checkpoint['test_result']

    else:
        start_epoch = 0
        best_acc = 0
        train_result, test_result = [], []

    # optmizer
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=0.0001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # Training
    start = time.time()

    for e in range(start_epoch, start_epoch + opt.epoch):
        train_result += train(model, train_loader, optimizer, loss_func,
                              device, start_epoch, scheduler, e)
        test_result += test(model, test_loader, loss_func, device, start_epoch,
                            e)
        scheduler.step()

        # Save checkpoint
        if test_result[1::2][-1] > best_acc:
            print(f'Saving Model....({result_path})')
            state = {
                'model': model.state_dict(),
                'epoch': e + 1,
                'acc': test_result[1::2][-1],
                'train_result': train_result,
                'test_result': test_result
            }
            torch.save(state, f'{result_path}/{opt.model}_ckpt.pth')
            best = test_result[1::2][-1]

        # Save Result
        if opt.save_result:
            print(f'Saving Result....({result_path})')
            save_result(train_result, test_result, result_path)

    end = time.time()
    with open(f'{result_path}/time_log.txt', 'w') as f:
        f.write(str(datetime.timedelta(seconds=end - start)))
        f.close()
Ejemplo n.º 16
0
import numpy as np
from utils import make_folder
#list1
#label_list = ['skin', 'neck', 'hat', 'eye_g', 'hair', 'ear_r', 'neck_l', 'cloth', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'nose', 'l_ear', 'r_ear', 'mouth', 'u_lip', 'l_lip']
#list2
label_list = [
    'skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear',
    'r_ear', 'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l',
    'neck', 'cloth'
]

folder_base = 'CelebAMaskHQ-mask-anno'
folder_save = 'CelebAMaskHQ-mask'
img_num = 30000

make_folder(folder_save)

for k in range(img_num):
    folder_num = k // 2000
    im_base = np.zeros((512, 512))
    for idx, label in enumerate(label_list):
        filename = os.path.join(folder_base, str(folder_num),
                                str(k).rjust(5, '0') + '_' + label + '.png')
        if (os.path.exists(filename)):
            print(label, idx + 1)
            im = cv2.imread(filename)
            im = im[:, :, 0]
            im_base[im != 0] = (idx + 1)

    filename_save = os.path.join(folder_save, str(k) + '.png')
    print(filename_save)
    def __init__(self, config):

        # Images data path & Output path
        self.dataset = config.dataset
        self.data_path = config.data_path
        self.save_path = os.path.join(config.save_path, config.name)

        # Training settings
        self.batch_size = config.batch_size
        self.total_step = config.total_step
        self.d_steps_per_iter = config.d_steps_per_iter
        self.g_steps_per_iter = config.g_steps_per_iter
        self.d_lr = config.d_lr
        self.g_lr = config.g_lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.inst_noise_sigma = config.inst_noise_sigma
        self.inst_noise_sigma_iters = config.inst_noise_sigma_iters
        self.start = 0  # Unless using pre-trained model

        # Image transforms
        self.shuffle = not config.dont_shuffle
        self.drop_last = not config.dont_drop_last
        self.resize = not config.dont_resize
        self.imsize = config.imsize
        self.centercrop = config.centercrop
        self.centercrop_size = config.centercrop_size

        # Step size
        self.log_step = config.log_step
        self.sample_step = config.sample_step
        self.model_save_step = config.model_save_step
        self.save_n_images = config.save_n_images
        self.max_frames_per_gif = config.max_frames_per_gif

        # Pretrained model
        self.pretrained_model = config.pretrained_model

        # Misc
        self.manual_seed = config.manual_seed
        self.disable_cuda = config.disable_cuda
        self.parallel = config.parallel
        self.num_workers = config.num_workers

        # Output paths
        self.model_weights_path = os.path.join(self.save_path,
                                               config.model_weights_dir)
        self.sample_path = os.path.join(self.save_path, config.sample_dir)

        # Model hyper-parameters
        self.adv_loss = config.adv_loss
        self.z_dim = config.z_dim
        self.g_conv_dim = config.g_conv_dim
        self.d_conv_dim = config.d_conv_dim
        self.lambda_gp = config.lambda_gp

        # Model name
        self.name = config.name

        # Create directories if not exist
        utils.make_folder(self.save_path)
        utils.make_folder(self.model_weights_path)
        utils.make_folder(self.sample_path)

        # Copy files
        utils.write_config_to_file(config, self.save_path)
        utils.copy_scripts(self.save_path)

        # Make dataloader
        self.dataloader, self.num_of_classes = utils.make_dataloader(
            self.batch_size, self.dataset, self.data_path, self.shuffle,
            self.num_workers, self.drop_last, self.resize, self.imsize,
            self.centercrop, self.centercrop_size)

        # Data iterator
        self.data_iter = iter(self.dataloader)

        # Check for CUDA
        utils.check_for_CUDA(self)

        # Build G and D
        self.build_models()

        # Start with pretrained model (if it exists)
        if self.pretrained_model != '':
            utils.load_pretrained_model(self)

        if self.adv_loss == 'dcgan':
            self.criterion = nn.BCELoss()
import utils
from trainer import TrainerStage2

if __name__ == "__main__":

    print("=======================================================")
    print("Train structure generator  with joint 2D optimization from novel viewpoints")
    print("=======================================================")

    cfg = options.get_arguments()

    EXPERIMENT = f"{cfg.model}_{cfg.experiment}"
    MODEL_PATH = f"models/{EXPERIMENT}"
    LOG_PATH = f"logs/{EXPERIMENT}"

    utils.make_folder(MODEL_PATH)
    utils.make_folder(LOG_PATH)

    criterions = utils.define_losses()
    dataloaders = utils.make_data_novel(cfg)

    model = utils.build_structure_generator(cfg).to(cfg.device)
    optimizer = utils.make_optimizer(cfg, model)
    scheduler = utils.make_lr_scheduler(cfg, optimizer)

    logger = utils.make_logger(LOG_PATH)
    writer = utils.make_summary_writer(EXPERIMENT)

    def on_after_epoch(model, df_hist, images, epoch, saveEpoch):
        utils.save_best_model(MODEL_PATH, model, df_hist)
        utils.checkpoint_model(MODEL_PATH, model, epoch, saveEpoch)
Ejemplo n.º 19
0
assert len(sys.argv) == 2, "usage: python g_partition.py $CelebAMaskHQ_ROOT"
root = sys.argv[1]

LB_SIZE = (512, 512)

#### source data path
s_label = os.path.join(root, 'CelebAMask-HQ-mask')
s_img = os.path.join(root, 'CelebA-HQ-img')
#### destination training data path
d_train = os.path.join(root, 'train')
d_val = os.path.join(root, 'val')
d_test = os.path.join(root, 'test')

#### make folder
make_folder(d_train)
make_folder(d_val)
make_folder(d_test)

mapping = os.path.join(root, 'CelebA-HQ-to-CelebA-mapping.txt')
image_list = pd.read_csv(mapping,
                         delim_whitespace=True,
                         header=None,
                         dtype=int,
                         skiprows=1,
                         usecols=[0, 1])


def resize_and_write(idx, d, count):
    in_fn = os.path.join(s_img, str(idx) + '.jpg')
    this_image = cv2.imread(in_fn)