def main(): start_time = time.time() template_location = os.path.join(current_dir(), 'output-template') css_file = os.path.join(template_location, 'static', 'all.css') template_file = os.path.join(template_location, 'example.html') args = sys.argv[1:] if len(args) < 3: raise Exception("This script should be run using. ./run <destination.xml> <taxonomy.xml> /path/to/output_location") # create the output folder and move the static files into it output_location = args[2] static_location = os.path.join(output_location, 'static') make_folder(output_location) make_folder(static_location) shutil.copy2(css_file, static_location) print "copied css from %s to %s" % (css_file, static_location) try: taxonomies = process_taxonomies(args[1]) except: raise Exception("Taxonomies could not be processed :(") try: process_destinations(args[0], taxonomies, template_file, output_location) except: raise Exception("Destinations could not be processed :(") print "\nexecution took: %0.2f seconds" % (time.time() - start_time) print "\nThank you for considering me for this position." print "To see more amazing code like this be sure to send me through to the next round."
feature = self.avg_pool(out) feature = feature.view(feature.shape[0], -1) # logits = self.classifier(feature) return feature meanstd = { 'cifar10': [(0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)], 'svhn': [(0.4376821, 0.4437697, 0.47280442), (0.19803012, 0.20101562, 0.19703614)] } if __name__ == "__main__": make_folder(args.save_path) with open(args.index_path, 'r') as f: label_indices = [int(line.rstrip('\n')) for line in f] data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(*meanstd[args.dataset])]) if args.dataset == "cifar10": train_dataset = dsets.CIFAR10(root=args.data_path, train=True, download=True, transform=data_transform) elif args.dataset == "svhn": train_dataset = dsets.SVHN(root=args.data_path,
def process_mtbi_data_files(_years): start_year = _years[0] end_year = _years[-1] print("\n***** PROCESSING MTBI DATA FROM {} TO {} *****".format( start_year, end_year)) year_folders = [] for year in _years: year_folders.append(constants.INTERVIEW_FILES[year]) fmli_pipe = utils.concat_data_for_type('fmli', year_folders, extract_files_path) mtbi_pipe = utils.concat_data_for_type('mtbi', year_folders, extract_files_path) # age_pipe = fmli_pipe['AGE_REF'].groupby(fmli_pipe['NEWID']).max() age_pipe = fmli_pipe.groupby('NEWID', as_index=False)['AGE_REF'].max() # Retain NEWID and FINLWT21 column finl_wt_pipe = fmli_pipe[['NEWID', 'FINLWT21']] # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files sum_finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'], as_index=False)['FINLWT21'].sum() sum_finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True) # Sum(COST) grouped by NEWID, UCC in 'mtbi' files monthly_age_spend_pipe = mtbi_pipe.groupby(['NEWID', 'UCC'], as_index=False)['COST'].sum() # Join AGE_REF by NEWID from 'fmli' to 'mbti' files monthly_age_spend_pipe = pd.merge(monthly_age_spend_pipe, age_pipe, on='NEWID', how='left') monthly_age_spend_pipe.drop_duplicates(inplace=True) # Join FINLWT21 by NEWID from 'fmli' to 'mbti' files monthly_age_spend_finl_wt_pipe = pd.merge(monthly_age_spend_pipe, finl_wt_pipe, on='NEWID', how='left') monthly_age_spend_finl_wt_pipe.drop_duplicates(inplace=True) # Sum(WT_COST) grouped by AGE_REF, UCC monthly_age_spend_finl_wt_pipe['WT_COST'] = monthly_age_spend_finl_wt_pipe[ 'FINLWT21'] * monthly_age_spend_finl_wt_pipe['COST'] monthly_age_ucc_spend_pipe = monthly_age_spend_finl_wt_pipe.groupby( ['AGE_REF', 'UCC'], as_index=False)['WT_COST'].sum() # Join the denominator Sum(FINLWT21) grouped by AGE_REF from 'fmli' to spend pipe monthly_age_ucc_spend_pipe = pd.merge(monthly_age_ucc_spend_pipe, sum_finl_wt_pipe, on='AGE_REF', how='left') monthly_age_ucc_spend_pipe.drop_duplicates(inplace=True) # Calcuate the average spend by dividing Sum(FINLWT21 * COST) by the denominator Sum(FINLWT21) grouped by AGE_REF monthly_age_ucc_spend_pipe['AVG_SPEND'] = ( ((monthly_age_ucc_spend_pipe['WT_COST'] / monthly_age_ucc_spend_pipe['SUM_FINLWT21']) * YEAR_BUCKET_SIZE_MULTIPLIER) / config.YEAR_BUCKET_SIZE).round(2) monthly_age_ucc_spend_pipe.drop(columns='SUM_FINLWT21', inplace=True) # Filter rows with AGE_REF >= 20 and AGE_REF <= 80 monthly_age_ucc_spend_pipe = monthly_age_ucc_spend_pipe[ (monthly_age_ucc_spend_pipe['AGE_REF'] >= config.AGE_THRESHOLDS['min']) & (monthly_age_ucc_spend_pipe['AGE_REF'] <= config.AGE_THRESHOLDS['max'])] monthly_age_ucc_spend_pipe['AGE_REF'] = monthly_age_ucc_spend_pipe[ 'AGE_REF'].astype(int) print(monthly_age_ucc_spend_pipe) # Export processed data utils.make_folder(config.EXPORT_FILES_PATH) export_file = os.path.join( config.EXPORT_FILES_PATH, "mtbi_avg_spend_intrvw_{}_to_{}.csv".format(start_year, end_year)) monthly_age_ucc_spend_pipe.to_csv(export_file, index=False) print("Exporting data to {}".format(export_file)) # Reshape and export processed data reshaped_data = monthly_age_ucc_spend_pipe.pivot('UCC', 'AGE_REF', 'AVG_SPEND') reshaped_data = pd.merge(reshaped_data, utils.ucc_pipe, on='UCC', how='left') reshaped_data.drop_duplicates(inplace=True) reshaped_data.rename(columns={'UCC_DESCRIPTION': 'UCC_DESCRIPTION_COPY'}, inplace=True) reshaped_data.insert(1, 'UCC_DESCRIPTION', reshaped_data['UCC_DESCRIPTION_COPY']) reshaped_data.drop(columns='UCC_DESCRIPTION_COPY', inplace=True) reshaped_file = os.path.join( config.EXPORT_FILES_PATH, "mtbi_reshaped_avg_spend_intrvw_{}_to_{}.csv".format( start_year, end_year)) reshaped_data.to_csv(reshaped_file, index=False)
def process_fmli_data_files(_years): start_year = _years[0] end_year = _years[-1] print("\n***** PROCESSING FMLI DATA FROM {} TO {} *****".format( start_year, end_year)) year_folders = [] for year in _years: year_folders.append(constants.INTERVIEW_FILES[year]) fmli_pipe = utils.concat_data_for_type('fmli', year_folders, extract_files_path) expn_vars_dict = get_expense_vars_dict(fmli_pipe) # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'], as_index=False)['FINLWT21'].sum() finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True) # print(finl_wt_pipe) # Get the weighted spend for key, val in expn_vars_dict.items(): fmli_pipe['WT_' + key] = fmli_pipe['FINLWT21'] * (fmli_pipe[val[0]] + fmli_pipe[val[1]]) # Sum(expn_vars) grouped by AGE_REF wt_expn_vars = [x for x in list(fmli_pipe) if x.startswith('WT_')] spend_pipe = fmli_pipe.groupby(['AGE_REF'], as_index=False)[wt_expn_vars].sum().round(2) spend_pipe = pd.merge(spend_pipe, finl_wt_pipe, on='AGE_REF', how='left') spend_pipe.drop_duplicates(inplace=True) # Calculate the average spend by dividing weighted spend by the denominator Sum(FINLWT21) grouped by AGE_REF for key, val in expn_vars_dict.items(): spend_pipe[key] = ( ((spend_pipe['WT_' + key] / spend_pipe['SUM_FINLWT21']) * YEAR_BUCKET_SIZE_MULTIPLIER) / config.YEAR_BUCKET_SIZE).round(2) spend_pipe.drop(columns='WT_' + key, inplace=True) spend_pipe.drop(columns='SUM_FINLWT21', inplace=True) # Filter rows with AGE_REF >= 20 and AGE_REF <= 80 spend_pipe = spend_pipe[ (spend_pipe['AGE_REF'] >= config.AGE_THRESHOLDS['min']) & (spend_pipe['AGE_REF'] <= config.AGE_THRESHOLDS['max'])] spend_pipe['AGE_REF'] = spend_pipe['AGE_REF'].astype(int) print(spend_pipe) # Export processed data utils.make_folder(config.EXPORT_FILES_PATH) export_file = os.path.join( config.EXPORT_FILES_PATH, "fmli_avg_spend_intrvw_{}_to_{}.csv".format(start_year, end_year)) spend_pipe.to_csv(export_file, index=False) print("Exporting data to {}".format(export_file)) # Reshape and export processed data reshaped_data = spend_pipe.set_index('AGE_REF') reshaped_data = reshaped_data.T reshaped_data.reset_index(drop=False, inplace=True) reshaped_data.rename(columns={'index': 'CAT_CODE'}, inplace=True) reshaped_data = pd.merge(reshaped_data, utils.fmli_category_pipe, on='CAT_CODE', how='left') reshaped_data.drop_duplicates(inplace=True) reshaped_data.rename(columns={'CAT_DESCRIPTION': 'CAT_DESCRIPTION_COPY'}, inplace=True) reshaped_data.insert(1, 'CAT_DESCRIPTION', reshaped_data['CAT_DESCRIPTION_COPY']) reshaped_data.drop(columns='CAT_DESCRIPTION_COPY', inplace=True) reshaped_file = os.path.join( config.EXPORT_FILES_PATH, "fmli_reshaped_avg_spend_intrvw_{}_to_{}.csv".format( start_year, end_year)) reshaped_data.to_csv(reshaped_file, index=False)
def main(config): # For fast training cudnn.benchmark = True ##### Dataloader ##### config.video_path = os.path.join(config.root_path, config.video_path) config.annotation_path = os.path.join(config.root_path, config.annotation_path) config.mean = get_mean(config.norm_value, dataset=config.mean_dataset) if config.no_mean_norm and not config.std_norm: norm_method = Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) elif not config.std_norm: norm_method = Normalize(config.mean, [1, 1, 1]) config.scales = [config.initial_scale] for i in range(1, config.n_scales): config.scales.append(config.scales[-1] * config.scale_step) if config.train: assert config.train_crop in ['random', 'corner', 'center'] if config.train_crop == 'random': crop_method = MultiScaleRandomCrop(config.scales, config.sample_size) elif config.train_crop == 'corner': crop_method = MultiScaleCornerCrop(config.scales, config.sample_size) elif config.train_crop == 'center': crop_method = MultiScaleCornerCrop(config.scales, config.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(config.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(config.n_frames) target_transform = ClassLabel() print("=" * 30, "\nLoading data...") training_data = get_training_set(config, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader( training_data, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, pin_memory=True) else: spatial_transform = Compose([ Scale(config.sample_size), CenterCrop(config.sample_size), ToTensor(config.norm_value), norm_method ]) temporal_transform = LoopPadding(config.n_frames) target_transform = ClassLabel() validation_data = get_validation_set(config, spatial_transform, temporal_transform, target_transform) val_loader = torch.utils.data.DataLoader( validation_data, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, pin_memory=True) ##### End dataloader ##### # Use Big-GAN dataset to test only # The random data is used in the trainer # Need to pre-process data and use the dataloader (above) # config.n_class = len(glob.glob(os.path.join(config.root_path, config.video_path))) ## Data loader print('number class:', config.n_class) # # Data loader # data_loader = Data_Loader(config.train, config.dataset, config.image_path, config.imsize, # config.batch_size, shuf=config.train) # Create directories if not exist make_folder(config.model_save_path, config.version) # make_folder(config.sample_path, config.version) make_folder(config.log_path, config.version) if config.train: if config.model == 'dvd-gan': trainer = Trainer(train_loader, config) else: trainer = None trainer.train() else: tester = Tester(val_loader, config) tester.test()
def main(config): cudnn.benchmark = True print("Loading data...") data_loader = Data_Loader(config.train, config.dataset, config.imsize, config.batch_size, config.image_path, shuf=config.train) print('Done.') # Create directories if these do not exist for _subdir in ['gen', 'gen_avg', 'gen_ema', 'gen_ema_slow']: make_folder(config.model_save_path, _subdir) make_folder(config.sample_path, _subdir) make_folder(config.log_path) make_folder(config.best_path) if config.backup_freq > 0: make_folder(config.bup_path) if config.dataset == 'imagenet' and config.fid_freq > 0: make_folder(config.metrics_path) # Train trainer = Trainer(data_loader.loader(), config) trainer.train()
# Dependencies import pandas as pd from tqdm import tqdm from constants import LABELS_FILE, IMAGES_PATH, SAVE_PATH, IMAGE_SIZE, SAVE_CSV from utils import make_folder, process_image, get_all_files # Making the folder to save images make_folder(SAVE_PATH) # Read the Labels label_csv = pd.read_csv(LABELS_FILE) labels = label_csv["count"].values del label_csv # Array to store all the data dataset = [] # Read and process all images images = get_all_files(IMAGES_PATH) for index, image in tqdm(enumerate(images)): try: saved_path = process_image(IMAGES_PATH, image, SAVE_PATH, (IMAGE_SIZE, IMAGE_SIZE)) dataset.append([saved_path, labels[index]]) except Exception as ex: pass
import options import utils from trainer import Validator if __name__ == "__main__": print("=======================================================") print("Evaluate / generate 3D Point Cloud generation model.") print("=======================================================") cfg = options.get_arguments() cfg.batchSize = cfg.inputViewN # cfg.chunkSize = 50 RESULTS_PATH = f"results/{cfg.model}_{cfg.experiment}" utils.make_folder(RESULTS_PATH) dataloaders = utils.make_data_fixed(cfg) test_dataset = dataloaders[1].dataset model = utils.build_structure_generator(cfg).to(cfg.device) validator = Validator(cfg, test_dataset) hist = validator.eval(model) hist.to_csv(f"{RESULTS_PATH}.csv", index=False)
def export_god_file(god_pipe, bucket_size, file_type): utils.make_folder(config.GOODNESS_OF_DATA_FOLDER_PATH) god_file_name = "{}_god_{}yrs.csv".format(file_type, bucket_size) god_file = os.path.join(config.GOODNESS_OF_DATA_FOLDER_PATH, god_file_name) god_pipe.to_csv(god_file, index=False) print("Exporting data to {}".format(god_file))
def export_goodness_of_fit_files(file_type, gof_pipe, part_file_name): utils.make_folder(config.GOODNESS_OF_FIT_FOLDER_PATH) gof_file_name = "{}_gof_{}.csv".format(file_type, part_file_name) gof_file = os.path.join(config.GOODNESS_OF_FIT_FOLDER_PATH, gof_file_name) gof_pipe.to_csv(gof_file, index=False) print("Exporting data to {}".format(gof_file))
def _get_state_data(self): """ This function downloads the data from autoredistrict.org's ftp. After some minor cleaning, the data is saved as a geopandas DataFrame. NOTE: currently the shape files on autoredistrict's ftp are districts instead of precincts as before. Don't use wget. INPUT: ---------------------------------------------------------------------------- self.state: string, postal ID for self.state and key to "self.states" dictionary wget: boolian (default=False), whether to download new shape files. OUTPUT: ---------------------------------------------------------------------------- None, but DataFrame is pickled in ../Data-Files/<self.state> alongside the shape files. """ # make folder if it doesn't already exist prefix = '../Data-Files/' + self.state utils.make_folder(prefix) # import shape files url = 'ftp://autoredistrict.org/pub/shapefiles2/' + self.state_name + '/2010/2012/vtd/tl*' if self.wget: call([ 'wget', '-P', prefix, ]) # read shape files into geopandas geo_path = glob(prefix + '/tl*.shp')[0] geo_df = geo.GeoDataFrame.from_file(geo_path) geo_df.CD_2010 = geo_df.CD_2010.astype(int) # drops totals and other non-precinct observations geo_df = geo_df[geo_df.CD_2010 >= 0] # ------------------------------------------------------------------------- # ADJUST ASPECT RATIO HERE: # ------------------------------------------------------------------------- geo_df = geo_df.to_crs(epsg=4267) # geo_df.geometry = geo_df.geometry.scale(xfact=1/100000, yfact=1/100000, zfact=1.0, origin=(0, 0)) # simplify geometries for faster image rendering # bigger number gives a smaller file size geo_df.geometry = geo_df.geometry.simplify(.01).buffer(.007) # add longitude and latitude lonlat = np.array([t.centroid.coords.xy for t in geo_df.geometry]) geo_df['INTPTLON10'] = lonlat[:, 0] geo_df['INTPTLAT10'] = lonlat[:, 1] # ------------------------------------------------------------------------- # make sure congressional districts are numbered starting at 0 geo_df.CD_2010 -= geo_df.CD_2010.min() # correct a few curiosities if self.state in ['KY']: geo_df.drop([ 'POP_BLACK', 'POP_WHITE', 'POP_ASIAN', 'POP_HAWAII', 'POP_HISPAN', 'POP_INDIAN', 'POP_MULTI', 'POP_OTHER', 'POP_TOTAL' ], axis=1, inplace=True) geo_df.rename(index=str, columns={ 'VAP_BLACK': 'POP_BLACK', 'VAP_WHITE': 'POP_WHITE', 'VAP_ASIAN': 'POP_ASIAN', 'VAP_HAWAII': 'POP_HAWAII', 'VAP_HISPAN': 'POP_HISPAN', 'VAP_INDIAN': 'POP_INDIAN', 'VAP_MULTI': 'POP_MULTI', 'VAP_OTHER': 'POP_OTHER', 'VAP_TOT': 'POP_TOTAL' }, inplace=True) # percent black in each precinct, account for precincts with zero population geo_df['BLACK_PCT'] = np.maximum( geo_df['POP_BLACK'] / geo_df['POP_TOTAL'], 0) geo_df.loc[np.isfinite(geo_df['POP_TOTAL']) == False, 'BLACK_PCT'] = 0 geo_df.fillna({'BLACK_PCT': 0}, inplace=True) # political breakdown geo_df['DEM'] = geo_df[['PRES04_DEM', 'PRES08_DEM', 'PRES12_DEM']].mean(axis=1) geo_df['REP'] = geo_df[['PRES04_REP', 'PRES08_REP', 'PRES12_REP']].mean(axis=1) geo_df['REP_PCT'] = geo_df['REP'] / (geo_df['DEM'] + geo_df['REP']) geo_df['DEM_PCT'] = geo_df['DEM'] / (geo_df['DEM'] + geo_df['REP']) # unpack multipolygons self.pcnct_df = geo_df #utils.unpack_multipolygons(geo_df) # pickle dataframe for future use pickle.dump(self.pcnct_df, open(prefix + '/precinct_data.p', 'wb'), protocol=2)
def process_interview_data_files(): fmli_pipe = concat_data_for_type("fmli") mtbi_pipe = concat_data_for_type("mtbi") # age_pipe = fmli_pipe['AGE_REF'].groupby(fmli_pipe['NEWID']).max() age_pipe = fmli_pipe.groupby('NEWID', as_index=False)['AGE_REF'].max() # Retain NEWID and FINLWT21 column finl_wt_pipe = fmli_pipe[['NEWID', 'FINLWT21']] print(finl_wt_pipe) # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files sum_finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'], as_index=False)['FINLWT21'].sum() sum_finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True) # Sum(COST) grouped by NEWID, UCC in 'mtbi' files monthly_age_spend_pipe = mtbi_pipe.groupby(['NEWID', 'UCC'], as_index=False)['COST'].sum() # Join AGE_REF by NEWID from 'fmli' to 'mbti' files monthly_age_spend_pipe = pd.merge(monthly_age_spend_pipe, age_pipe, on='NEWID', how='left') monthly_age_spend_pipe.drop_duplicates(inplace=True) # Join FINLWT21 by NEWID from 'fmli' to 'mbti' files monthly_age_spend_finl_wt_pipe = pd.merge(monthly_age_spend_pipe, finl_wt_pipe, on='NEWID', how='left') monthly_age_spend_finl_wt_pipe.drop_duplicates(inplace=True) # Sum(WT_COST) grouped by AGE_REF, UCC monthly_age_spend_finl_wt_pipe['WT_COST'] = monthly_age_spend_finl_wt_pipe['FINLWT21'] * \ monthly_age_spend_finl_wt_pipe['COST'] monthly_age_ucc_spend_pipe = monthly_age_spend_finl_wt_pipe.groupby( ['AGE_REF', 'UCC'], as_index=False)['WT_COST'].sum() # Join the denominator Sum(FINLWT21) grouped by AGE_REF from 'fmli' to spend pipe monthly_age_ucc_spend_pipe = pd.merge(monthly_age_ucc_spend_pipe, sum_finl_wt_pipe, on='AGE_REF', how='left') monthly_age_ucc_spend_pipe.drop_duplicates(inplace=True) # Divide Sum(FINLWT21 * COST) by the denominator Sum(FINLWT21) grouped by AGE_REF monthly_age_ucc_spend_pipe['AVG_SPEND'] = ( (monthly_age_ucc_spend_pipe['WT_COST'] / monthly_age_ucc_spend_pipe['SUM_FINLWT21']) * 20).round(2) monthly_age_ucc_spend_pipe = monthly_age_ucc_spend_pipe[ (monthly_age_ucc_spend_pipe['AGE_REF'] >= 20) & (monthly_age_ucc_spend_pipe['AGE_REF'] <= 80)] monthly_age_ucc_spend_pipe['AGE_REF'] = monthly_age_ucc_spend_pipe[ 'AGE_REF'].astype(int) print(monthly_age_ucc_spend_pipe) # Export processed data utils.make_folder(config.EXPORT_FILES_PATH) export_file = os.path.join(config.EXPORT_FILES_PATH, "test_export_file.csv") monthly_age_ucc_spend_pipe.to_csv(export_file, index=False) print("Exporting data to {}".format(export_file)) # Reshape and export processed data reshaped_data = monthly_age_ucc_spend_pipe.pivot('UCC', 'AGE_REF', 'AVG_SPEND') reshaped_data = pd.merge(reshaped_data, data_dict_pipe, on='UCC', how='left') reshaped_data.drop_duplicates(inplace=True) reshaped_data.insert(1, 'UCC_DESCRIPTION', reshaped_data['UCC_DESC']) reshaped_data.drop(columns='UCC_DESC', inplace=True) print(reshaped_data) reshaped_file = os.path.join(config.EXPORT_FILES_PATH, "test_reshaped_file.csv") reshaped_data.to_csv(reshaped_file, index=False)
def process_fmli_data_files(): fmli_pipe = concat_data_for_type('fmli') expense_vars = [ x for x in list(fmli_pipe) if x.endswith('PQ') or x.endswith('CQ') ] expense_vars_dict = {} for expense_var in expense_vars: expense_vars_dict[expense_var[0:-2]] = [] for expense_var in expense_vars: expense_vars_dict.get(expense_var[0:-2]).append(expense_var) print(expense_vars_dict) # Sum(FINLWT21) grouped by AGE_REF in 'fmli' files finl_wt_pipe = fmli_pipe.groupby(['AGE_REF'], as_index=False)['FINLWT21'].sum() finl_wt_pipe.rename(columns={'FINLWT21': 'SUM_FINLWT21'}, inplace=True) # print(finl_wt_pipe) # Get the weighted spend for key, val in expense_vars_dict.items(): if len(val) < 2: fmli_pipe['WT_' + key] = fmli_pipe['FINLWT21'] * fmli_pipe[val[0]] else: fmli_pipe['WT_' + key] = fmli_pipe['FINLWT21'] * (fmli_pipe[val[0]] + fmli_pipe[val[1]]) # Sum(expense_vars) grouped by AGE_REF wt_expense_vars = [x for x in list(fmli_pipe) if x.startswith('WT_')] spend_pipe = fmli_pipe.groupby( ['AGE_REF'], as_index=False)[wt_expense_vars].sum().round(2) spend_pipe = pd.merge(spend_pipe, finl_wt_pipe, on='AGE_REF', how='left') spend_pipe.drop_duplicates(inplace=True) for key, val in expense_vars_dict.items(): spend_pipe[key] = ( (spend_pipe['WT_' + key] / spend_pipe['SUM_FINLWT21']) * 20).round(2) spend_pipe.drop(columns='WT_' + key, inplace=True) spend_pipe.drop(columns='SUM_FINLWT21', inplace=True) spend_pipe = spend_pipe[(spend_pipe['AGE_REF'] >= 20) & (spend_pipe['AGE_REF'] <= 80)] spend_pipe['AGE_REF'] = spend_pipe['AGE_REF'].astype(int) print(spend_pipe) # Export processed data utils.make_folder(config.EXPORT_FILES_PATH) export_file = os.path.join(config.EXPORT_FILES_PATH, "test_fmli_export_file.csv") spend_pipe.to_csv(export_file, index=False) print("Exporting data to {}".format(export_file)) # Reshape and export processed data reshaped_data = spend_pipe.set_index('AGE_REF') reshaped_data = reshaped_data.T reshaped_data.reset_index(drop=False, inplace=True) reshaped_data.rename(columns={'index': 'CAT_CODE'}, inplace=True) reshaped_data = pd.merge(reshaped_data, utils.fmli_category_pipe, on='CAT_CODE', how='left') reshaped_data.drop_duplicates(inplace=True) reshaped_data.rename(columns={'CAT_DESCRIPTION': 'CAT_DESCRIPTION_COPY'}, inplace=True) reshaped_data.insert(1, 'CAT_DESCRIPTION', reshaped_data['CAT_DESCRIPTION_COPY']) reshaped_data.drop(columns='CAT_DESCRIPTION_COPY', inplace=True) print(reshaped_data) reshaped_file = os.path.join(config.EXPORT_FILES_PATH, "test_fmli_reshaped_file.csv") reshaped_data.to_csv(reshaped_file, index=False)
import utils from trainer import TrainerStage1 if __name__ == "__main__": print("=======================================================") print("Pretrain structure generator with fixed viewpoints") print("=======================================================") cfg = options.get_arguments() EXPERIMENT = f"{cfg.model}_{cfg.experiment}" MODEL_PATH = f"models/{EXPERIMENT}" LOG_PATH = f"logs/{EXPERIMENT}" utils.make_folder(MODEL_PATH) utils.make_folder(LOG_PATH) criterions = utils.define_losses() dataloaders = utils.make_data_fixed(cfg) model = utils.build_structure_generator(cfg).to(cfg.device) optimizer = utils.make_optimizer(cfg, model) scheduler = utils.make_lr_scheduler(cfg, optimizer) logger = utils.make_logger(LOG_PATH) writer = utils.make_summary_writer(EXPERIMENT) def on_after_epoch(model, df_hist, images, epoch): utils.save_best_model(MODEL_PATH, model, df_hist) utils.log_hist(logger, df_hist)
def main(opt): # make folder base_path = 'result' os.makedirs(base_path, exist_ok=True) result_path = make_folder(base_path, opt.save_folder) # Dataset print(f'Preparing Dataset....{opt.dataset}') train_transform = transforms.Compose([ transforms.Resize((32, 32)), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_set, test_set = get_dataset(opt.dataset, train_transform, test_transform) if opt.testing: train_set = Subset(train_set, range(opt.train_batch_size)) test_set = Subset(test_set, range(opt.test_batch_size)) # Load Dataset train_loader = DataLoader(train_set, batch_size=opt.train_batch_size, shuffle=True) test_loader = DataLoader(test_set, batch_size=opt.test_batch_size, shuffle=False) # GPU device = 'cuda' if (torch.cuda.is_available() and opt.cuda) else 'cpu' print(f'Using {device}') # model from torchvision.models import vgg16_bn print(f'Preparing Model....{opt.model}') model = get_model(opt.model, opt.num_classes, pretrained=opt.pretrained) model.to(device) # resuming if opt.resume: print('Resuming from checkpoint') assert os.path.isdir(f'{opt.resume}') checkpoint = torch.load(f'{opt.resume}/{opt.model}_ckpt.pth') model.load_state_dict(checkpoint['model']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] train_result = checkpoint['train_result'] test_result = checkpoint['test_result'] else: start_epoch = 0 best_acc = 0 train_result, test_result = [], [] # optmizer loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=0.0001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) # Training start = time.time() for e in range(start_epoch, start_epoch + opt.epoch): train_result += train(model, train_loader, optimizer, loss_func, device, start_epoch, scheduler, e) test_result += test(model, test_loader, loss_func, device, start_epoch, e) scheduler.step() # Save checkpoint if test_result[1::2][-1] > best_acc: print(f'Saving Model....({result_path})') state = { 'model': model.state_dict(), 'epoch': e + 1, 'acc': test_result[1::2][-1], 'train_result': train_result, 'test_result': test_result } torch.save(state, f'{result_path}/{opt.model}_ckpt.pth') best = test_result[1::2][-1] # Save Result if opt.save_result: print(f'Saving Result....({result_path})') save_result(train_result, test_result, result_path) end = time.time() with open(f'{result_path}/time_log.txt', 'w') as f: f.write(str(datetime.timedelta(seconds=end - start))) f.close()
import numpy as np from utils import make_folder #list1 #label_list = ['skin', 'neck', 'hat', 'eye_g', 'hair', 'ear_r', 'neck_l', 'cloth', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'nose', 'l_ear', 'r_ear', 'mouth', 'u_lip', 'l_lip'] #list2 label_list = [ 'skin', 'nose', 'eye_g', 'l_eye', 'r_eye', 'l_brow', 'r_brow', 'l_ear', 'r_ear', 'mouth', 'u_lip', 'l_lip', 'hair', 'hat', 'ear_r', 'neck_l', 'neck', 'cloth' ] folder_base = 'CelebAMaskHQ-mask-anno' folder_save = 'CelebAMaskHQ-mask' img_num = 30000 make_folder(folder_save) for k in range(img_num): folder_num = k // 2000 im_base = np.zeros((512, 512)) for idx, label in enumerate(label_list): filename = os.path.join(folder_base, str(folder_num), str(k).rjust(5, '0') + '_' + label + '.png') if (os.path.exists(filename)): print(label, idx + 1) im = cv2.imread(filename) im = im[:, :, 0] im_base[im != 0] = (idx + 1) filename_save = os.path.join(folder_save, str(k) + '.png') print(filename_save)
def __init__(self, config): # Images data path & Output path self.dataset = config.dataset self.data_path = config.data_path self.save_path = os.path.join(config.save_path, config.name) # Training settings self.batch_size = config.batch_size self.total_step = config.total_step self.d_steps_per_iter = config.d_steps_per_iter self.g_steps_per_iter = config.g_steps_per_iter self.d_lr = config.d_lr self.g_lr = config.g_lr self.beta1 = config.beta1 self.beta2 = config.beta2 self.inst_noise_sigma = config.inst_noise_sigma self.inst_noise_sigma_iters = config.inst_noise_sigma_iters self.start = 0 # Unless using pre-trained model # Image transforms self.shuffle = not config.dont_shuffle self.drop_last = not config.dont_drop_last self.resize = not config.dont_resize self.imsize = config.imsize self.centercrop = config.centercrop self.centercrop_size = config.centercrop_size # Step size self.log_step = config.log_step self.sample_step = config.sample_step self.model_save_step = config.model_save_step self.save_n_images = config.save_n_images self.max_frames_per_gif = config.max_frames_per_gif # Pretrained model self.pretrained_model = config.pretrained_model # Misc self.manual_seed = config.manual_seed self.disable_cuda = config.disable_cuda self.parallel = config.parallel self.num_workers = config.num_workers # Output paths self.model_weights_path = os.path.join(self.save_path, config.model_weights_dir) self.sample_path = os.path.join(self.save_path, config.sample_dir) # Model hyper-parameters self.adv_loss = config.adv_loss self.z_dim = config.z_dim self.g_conv_dim = config.g_conv_dim self.d_conv_dim = config.d_conv_dim self.lambda_gp = config.lambda_gp # Model name self.name = config.name # Create directories if not exist utils.make_folder(self.save_path) utils.make_folder(self.model_weights_path) utils.make_folder(self.sample_path) # Copy files utils.write_config_to_file(config, self.save_path) utils.copy_scripts(self.save_path) # Make dataloader self.dataloader, self.num_of_classes = utils.make_dataloader( self.batch_size, self.dataset, self.data_path, self.shuffle, self.num_workers, self.drop_last, self.resize, self.imsize, self.centercrop, self.centercrop_size) # Data iterator self.data_iter = iter(self.dataloader) # Check for CUDA utils.check_for_CUDA(self) # Build G and D self.build_models() # Start with pretrained model (if it exists) if self.pretrained_model != '': utils.load_pretrained_model(self) if self.adv_loss == 'dcgan': self.criterion = nn.BCELoss()
import utils from trainer import TrainerStage2 if __name__ == "__main__": print("=======================================================") print("Train structure generator with joint 2D optimization from novel viewpoints") print("=======================================================") cfg = options.get_arguments() EXPERIMENT = f"{cfg.model}_{cfg.experiment}" MODEL_PATH = f"models/{EXPERIMENT}" LOG_PATH = f"logs/{EXPERIMENT}" utils.make_folder(MODEL_PATH) utils.make_folder(LOG_PATH) criterions = utils.define_losses() dataloaders = utils.make_data_novel(cfg) model = utils.build_structure_generator(cfg).to(cfg.device) optimizer = utils.make_optimizer(cfg, model) scheduler = utils.make_lr_scheduler(cfg, optimizer) logger = utils.make_logger(LOG_PATH) writer = utils.make_summary_writer(EXPERIMENT) def on_after_epoch(model, df_hist, images, epoch, saveEpoch): utils.save_best_model(MODEL_PATH, model, df_hist) utils.checkpoint_model(MODEL_PATH, model, epoch, saveEpoch)
assert len(sys.argv) == 2, "usage: python g_partition.py $CelebAMaskHQ_ROOT" root = sys.argv[1] LB_SIZE = (512, 512) #### source data path s_label = os.path.join(root, 'CelebAMask-HQ-mask') s_img = os.path.join(root, 'CelebA-HQ-img') #### destination training data path d_train = os.path.join(root, 'train') d_val = os.path.join(root, 'val') d_test = os.path.join(root, 'test') #### make folder make_folder(d_train) make_folder(d_val) make_folder(d_test) mapping = os.path.join(root, 'CelebA-HQ-to-CelebA-mapping.txt') image_list = pd.read_csv(mapping, delim_whitespace=True, header=None, dtype=int, skiprows=1, usecols=[0, 1]) def resize_and_write(idx, d, count): in_fn = os.path.join(s_img, str(idx) + '.jpg') this_image = cv2.imread(in_fn)