def save(dataset_name, write_separator=";"): """ Compute the mappings <relationship name -> list of types> for a specific dataset, and save them in a file. :param write_separator: the separator to use when writing the file :param dataset_name: the name of the dataset for which to compute the mappings """ dataset = datasets.Dataset(dataset_name) relation_2_types = compute(dataset) lines = [] for relation in sorted(dataset.relationships): lines.append( write_separator.join( [relation, ",".join(relation_2_types[relation])]) + "\n") output_filepath = os.path.join(dataset.home, FOLDER, FILENAME) print( "Writing the mappings <entity name -> in, out and overall degree> for %s training set in %s..." % (dataset_name, output_filepath)) with open(output_filepath, "w") as output_file: output_file.writelines(lines)
def save(dataset_name, write_separator=";"): """ Compute the mapping <relation name -> number of mentions> from the training set of a specific dataset and save it in a specific file in the home folder for that dataset :param dataset_name: the name of the dataset to compute and save the mappings for :param write_separator: the separator to use when writing the mappings """ dataset = datasets.Dataset(dataset_name) relation_2_mentions = compute(dataset) lines = [] for relation in relation_2_mentions: lines.append(write_separator.join([relation, str(relation_2_mentions[relation])]) + "\n") output_filepath = os.path.join(dataset.home, FOLDER, FILENAME) print("Writing the mapping <relation name -> number of mentions> for %s training set in %s..." % (dataset_name, output_filepath)) with open(output_filepath, "w") as output_file: output_file.writelines(lines) #save(datasets.FB15K) #save(datasets.WN18) #save(datasets.FB15K_237) #save(datasets.WN18RR) # save(datasets.YAGO3_10)
def __init__( self, *args, identifier: Identifier = None, dataset_fmt: str = None, **kwargs, ): logger.debug("Creating Dataset.") # Internally, keep track of data inside a dataset self._dataset = None # Set a default dataset_fmt if len(args) == 1 and isinstance(args[0], datasets.Dataset): # If no `dataset_fmt` is passed in with a datasets.Dataset, just assume # `dataset_fmt` should be 'datasets' dataset_fmt = "datasets" if dataset_fmt is None else dataset_fmt else: # Default to `in_memory`, unless a dataset_fmt is passed in explicitly dataset_fmt = "in_memory" if dataset_fmt is None else dataset_fmt # InMemoryDataset if dataset_fmt == "in_memory": if len(args) == 1 and isinstance(args[0], InMemoryDataset): # Assign the dataset directly self._dataset = args[0] else: # Assign the dataset after converting to an InMemoryDataset self._dataset = InMemoryDataset(*args, **kwargs) # datasets.Dataset elif dataset_fmt == "datasets": if len(args) == 1 and isinstance(args[0], datasets.Dataset): # Assign the dataset directly self._dataset = args[0] else: # Assign the dataset after converting to a datasets.Dataset self._dataset = datasets.Dataset(*args, **kwargs) else: raise NotImplementedError( "`dataset_fmt` must be one of ['in_memory', 'datasets'].") # Store the dataset format self._dataset_fmt = dataset_fmt # Call the InteractionTapeHierarchyMixin constructor InteractionTapeHierarchyMixin.__init__(self) # Create an identifier self._identifier = (self._autobuild_identifier() if not identifier else identifier) # Create logging directory self._create_logdir() # Add an index to the dataset if not self.has_index: self._add_index()
def save(dataset_name, write_separator = ";"): """ Compute the mappings <entity name -> in degree> <entity name -> out degree> <entity name -> overall degree> and save them in a file. :param write_separator: the separator to use when writing the file :param dataset_name: the name of the dataset for which to compute the mappings """ dataset = datasets.Dataset(dataset_name) entity_in_degrees, entity_out_degrees, entity_degrees = compute(dataset) lines = [] for entity in entity_degrees: lines.append(write_separator.join([entity, str(entity_in_degrees[entity]), str(entity_out_degrees[entity]), str(entity_degrees[entity])]) + "\n") output_filepath = os.path.join(dataset.home, FOLDER, FILENAME) print("Writing the mappings <entity name -> in, out and overall degree> for %s training set in %s..." % (dataset_name, output_filepath)) with open(output_filepath, "w") as output_file: output_file.writelines(lines)
def elmo_emb_dataset(task: str, split: str): dataset = ds.Dataset() if task == "secondary_structure": dataset = ds.SecondaryStructureDataset(SOURCE_DATA_PATH, split) elif task == "remote_homology": dataset = ds.RemoteHomologyDataset(SOURCE_DATA_PATH, split) elif task == "stability": dataset = ds.StabilityDataset(SOURCE_DATA_PATH, split) elif task == "fluorescence": dataset = ds.FluorescenceDataset(SOURCE_DATA_PATH, split) else: print("no data set for task " + task) return # set up file filename = task + "_" + split filepath = "./elmo/" + task + "/" + filename + ".p" os.makedirs(os.path.dirname(filepath), exist_ok=True) # do emb print("Embedding dataset " + filename) seq_dict = data_set_to_seq_dict(dataset) emb_dict = emb.get_embeddings(seq_dict, verbose=True) print('Writing embeddings to: {}'.format(filepath)) with open(filepath, 'wb') as f: pickle.dump(emb_dict, f)
def load_docs(dataset_name, word_vectors): return (datasets.Dataset(dataset_name, model_properties.MentionRankingProps(), word_vectors), zip( utils.load_pickle(directories.DOCUMENTS + dataset_name + '_docs.pkl'), utils.load_pickle(directories.ACTION_SPACE + dataset_name + '_action_space.pkl')))
def SetData(self, name, val, symerr=None, negerr=None, poserr=None): """Set dataset with name with values (and optionally errors).""" data = datasets.Dataset(val, symerr, negerr, poserr) op = operations.OperationDatasetSet(name, data) self.document.applyOperation(op) if self.verbose: print "Set dataset '%s':" % name print " Values = %s" % str(data.data) print " Symmetric errors = %s" % str(data.serr) print " Negative errors = %s" % str(data.nerr) print " Positive errors = %s" % str(data.perr)
def do(self, document): """Create the dataset.""" OperationDatasetCreate.do(self, document) p = self.parts.copy() p['parametric'] = self.parametric ds = datasets.DatasetExpression(**p) ds.document = document if not self.link: # copy these values if we don't want to link ds = datasets.Dataset(data=ds.data, serr=ds.serr, perr=ds.perr, nerr=ds.nerr) document.setData(self.datasetname, ds) return ds
def setData(self, document, linkedfile=None): """Set the read-in datasets in the document.""" # iterate over each read-in dataset dsnames = [] for name in self.data.iterkeys(): # skip error data here, they are used below # error data name contains \0 if name.find('\0') >= 0: continue dsnames.append(name) # get data and errors (if any) data = [] for k in (name, name + '\0+-', name + '\0+', name + '\0-'): data.append(self.data.get(k, None)) # make them have a maximum length by adding NaNs maxlen = max([len(x) for x in data if x is not None]) for i in range(len(data)): if data[i] is not None and len(data[i]) < maxlen: data[i] = N.concatenate( (data[i], N.zeros(maxlen - len(data[i])) * N.nan)) # create dataset dstype = self.nametypes[name] if dstype == 'string': ds = datasets.DatasetText(data=data[0], linked=linkedfile) elif dstype == 'date': ds = datasets.DatasetDateTime(data=data[0], linked=linkedfile) else: ds = datasets.Dataset(data=data[0], serr=data[1], perr=data[2], nerr=data[3], linked=linkedfile) document.setData(name, ds) dsnames.sort() return dsnames
def main(): parser = argparse.ArgumentParser(description='Behavioral Cloning Training Program') parser.add_argument('-d', help='data directory', dest='data_dir', type=str, default='./data/drive_data.csv') parser.add_argument('-t', help='train size fraction', dest='train_size', type=float, default=0.8) parser.add_argument('-e', help='number of epochs', dest='nb_epoch', type=int, default=10) parser.add_argument('-b', help='batch size', dest='batch_size', type=int, default=64) parser.add_argument('-l', help='learning rate', dest='learning_rate', type=float, default=1.0e-4) args = parser.parse_args() args = vars(args) #print parameters print('-' * 30) print('Parameters') print('-' * 30) for key, value in args.items(): print('{:<20} := {}'.format(key, value)) print('-' * 30) # Set device if torch.cuda.is_available(): print('Using GPU !!!') device = torch.device("cuda:0") torch.backends.cudnn.benchmark = True else: print('Using CPU !!!') device = torch.device("cpu") # Create Dataset drivingData = datasets.Dataset(args['data_dir'], Transforms()) # Split Dataset train_size = int(len(drivingData) * args['train_size']) training_set, val_set = random_split(drivingData, [train_size, len(drivingData) - train_size]) trainLoader = DataLoader(training_set, batch_size=args["batch_size"], num_workers=3, shuffle=True) valLoader = DataLoader(val_set, batch_size=args["batch_size"], num_workers=3, shuffle=False) # Initialize Model model = Driver(batch_size=args['batch_size']) # Start Training train(model, device, lr=args['learning_rate'], epochs=args['nb_epoch'], trainingLoader=trainLoader, validationLoader=valLoader)
def train(self, args): noise = locate('noises.{0}'.format(args.noise))(args) dataset = datasets.Dataset(args) x_real = dataset.training_data[:args.n_test_data].copy() z_test = noise.test_data config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True with tf.Session(config=config_proto) as sess: sess.run(tf.global_variables_initializer()) for step in range(1, args.n_iters + 1): # Update discriminator x = dataset.get_training_data(args.batch_size) z = noise.sample(args.batch_size) D_loss, _ = sess.run( [self.D_loss, self.D_opt], { 'x_real:0': x, 'z_noise:0': z, 'train_D:0': True, 'train_G:0': True, }) # Update generator z = noise.sample(args.batch_size) G_loss, _ = sess.run([self.G_loss, self.G_opt], { 'z_noise:0': z, 'train_D:0': True, 'train_G:0': True, }) if step % args.log_interval == 0: print 'step:{0:>6}, Ld:{1:>9.6f}, Lg:{2:>9.6f}'.format( step, D_loss, G_loss) if step % args.plot_interval == 0: x_fake = self.sample_data(args, sess, z_test) plotutil.save_plot(args, step, x_real, x_fake, z_test)
def _import1d(self, hdu): """Import 1d data from hdu.""" data = hdu.data datav = None symv = None posv = None negv = None # read the columns required p = self.params if p.datacol is not None: datav = data.field(p.datacol) if p.symerrcol is not None: symv = data.field(p.symerrcol) if p.poserrcol is not None: posv = data.field(p.poserrcol) if p.negerrcol is not None: negv = data.field(p.negerrcol) # actually create the dataset return datasets.Dataset(data=datav, serr=symv, perr=posv, nerr=negv)
def do(self, document): """Create dataset using range.""" OperationDatasetCreate.do(self, document) data = self.parts['data'] serr = self.parts.get('serr', None) perr = self.parts.get('perr', None) nerr = self.parts.get('nerr', None) ds = datasets.DatasetRange(self.numsteps, data, serr=serr, perr=perr, nerr=nerr) if not self.linked: # copy these values if we don't want to link ds = datasets.Dataset(data=ds.data, serr=ds.serr, perr=ds.perr, nerr=ds.nerr) document.setData(self.datasetname, ds) return ds
def get_mask(self, geometry, shape): # create an ogr datasource driver = ogr.GetDriverByName('Memory') source = driver.CreateDataSource('') sr = osr.SpatialReference(self.projection) layer = source.CreateLayer('', sr) defn = layer.GetLayerDefn() feature = ogr.Feature(defn) feature.SetGeometry(geometry) layer.CreateFeature(feature) # burn where data should be mask = np.zeros(shape, dtype='u1') geo_transform = self.geo_transform.shifted(geometry) kwargs = { 'geo_transform': geo_transform, 'projection': self.projection } with datasets.Dataset(mask, **kwargs) as dataset: gdal.RasterizeLayer(dataset, (1, ), layer, burn_values=(1, )) return mask.astype('b1').repeat(3, axis=0)
def top_k_recommendation(file_with_user_ratings, test_og_df, k): name = 'ml_gui' data_utility_dir = '../datasets/ml-100k/utility-matrix/' results_folder = '../wnmf/' ds = datasets.TrainingAndTest('ml_gui') ds.training = datasets.Dataset( 'ml_gui', #name file_with_user_ratings, #original source data_utility_dir + name + '_um.csv', #utility matrix results_folder + name + '_', #similarity matrix 'ml', #data source 'wnmf', #algorithm 'wnmf', 2, #latent factors 26) #iterations #get ds.test = datasets.TestSet( str(name) + ' test set', #name 'gui_user_test.csv', 'ml') ds.build_ml_wnmf_predictions_df(results_folder + str(name) + '_wnmf_predictions.csv', cap=False) predictions = ds.test.predictions_df.copy() recommendations = predictions.sort_values('prediction', ascending=False).head(k) recommendations = list(recommendations['item']) movie_dict, titles = movie_titles.get_movie_info() recommendations = [movie_dict[i] for i in recommendations] #delete source files to ensure not reused os.remove('../wnmf/ml_gui__u.csv') os.remove('../wnmf/ml_gui__v.csv') os.remove('../wnmf/ml_gui_wnmf_predictions.csv') os.remove('../datasets/ml-100k/utility-matrix/ml_gui_um.csv') #returns list of top 5 return recommendations
def main(): params = parser.Parser().get_arguments() prod = np.prod(params["image_dims"]) print(params) # Get datasets if params["dataset"] == "mnist": params["image_dims"] = [32, 32, 1] x_train, x_test = datasets.Dataset(params).mnist() x_train = x_train.reshape([-1, 32, 32, 1]) x_test = x_test.reshape([-1, 32, 32, 1]) elif params["dataset"] == "cifar": params["image_dims"] = [32, 32, 3] x_train, x_test = datasets.Dataset(params).cifar() x_train = x_train.reshape([-1, 32, 32, 3]) x_test = x_test.reshape([-1, 32, 32, 3]) elif params["dataset"] == "stl10": params["image_dims"] = [96, 96, 3] if params["colors_output"] == "rgb": x_test = datasets.Dataset(params).stl10(colors="rgb") x_test = x_test.reshape([-1, 96, 96, 3]) elif params["colors_output"] == "cbcr": x_test = datasets.Dataset(params).stl10(colors="ycbr") x_test = x_test.reshape([-1, 96, 96, 3]) # Import test image test_image = x_test[:10] # Get noise adv_dict = { 'psnr_input': 0, 'rand-linf': 1, 'rand-l2': 2, 'linear-linf-1': 3, 'linear-linf-10': 4, 'linear-linf-20': 5, 'linear-l2-1': 6, 'linear-l2-10': 7, 'linear-l2-20': 8, 'quadratic-linf-1': 9, 'quadratic-linf-10': 10, 'quadratic-l2-1': 11, 'quadratic-l2-10': 12, 'linear-pixel-1': 13, 'linear-pixel-10': 14, 'quadratic-pixel-1': 15, 'rand-pixel-1': 16, 'rand-pixel-10': 17 } img_mtx = {} if params["norm"] == "l2": # Epsilon epsilon_range = np.array(np.logspace(-0.5, 1.0, 5)) # for l2 adv_mtx = np.zeros([len(epsilon_range), len(adv_dict)]) # L2 adv_mtx[:, adv_dict['psnr_input']] = -20. * np.log10( epsilon_range / np.sqrt(prod)) # only true for l2 adv_mtx[:, adv_dict['rand-l2']], img_mtx['rand-l2'] = \ adv_noise(test_image, params, epsilon_range, "rand", "l2", False) adv_mtx[:, adv_dict['linear-l2-1']], img_mtx['linear-l2-1'] = \ adv_noise(test_image, params, epsilon_range, "linear", "l2", False, 1) adv_mtx[:, adv_dict['linear-l2-10']], img_mtx['linear-l2-10'] = \ adv_noise(test_image, params, epsilon_range, "linear", "l2", False, 10) adv_mtx[:, adv_dict['linear-l2-20']], img_mtx['linear-l2-20'] = \ adv_noise(test_image, params, epsilon_range, "linear", "l2", False, 20) if params['dataset'] == 'mnist' or params['dataset'] == 'cifar': adv_mtx[:, adv_dict['quadratic-l2-1']], img_mtx['quadratic-l2-1'] = \ adv_noise(test_image, params, epsilon_range, "quadratic", "l2", False, 1) # adv_mtx[:, adv_dict['quadratic-l2-10']], img_mtx['quadratic-l2-10'] = \ # adv_noise(test_image, params, epsilon_range, "quadratic", "l2", True, 10) # helpers.save_image_fig(img_mtx, params, adv_mtx[:, adv_dict['psnr_input']], # ["rand-l2", 'linear-l2-1', 'linear-l2-10', 'linear-l2-20', 'quadratic-l2-1', 'quadratic-l2-10'], "l2", # img_size=params["image_dims"]) else: print('No image plots for l2') # helpers.save_image_fig(img_mtx, params, adv_mtx[:, adv_dict['psnr_input']], # ["rand-l2", 'linear-l2-1', 'linear-l2-10', 'linear-l2-20'], # "l2", img_size=params["image_dims"]) elif params["norm"] == "linf": # Epsilon epsilon_range = np.array(np.logspace(-2, -0.5, 5)) # for linf adv_mtx = np.zeros([len(epsilon_range), len(adv_dict)]) # Linf adv_mtx[:, adv_dict['psnr_input']] = -20. * np.log10(epsilon_range) adv_mtx[:, adv_dict['rand-linf']], img_mtx['rand-linf'] = \ adv_noise(test_image, params, epsilon_range, "rand", "linf", False) adv_mtx[:, adv_dict['linear-linf-1']], img_mtx['linear-linf-1'] = \ adv_noise(test_image, params, epsilon_range, "linear", "linf", False, 1) adv_mtx[:, adv_dict['linear-linf-10']], img_mtx['linear-linf-10'] = \ adv_noise(test_image, params, epsilon_range, "linear", "linf", False, 10) adv_mtx[:, adv_dict['linear-linf-20']], img_mtx['linear-linf-20'] = \ adv_noise(test_image, params, epsilon_range, "linear", "linf", False, 20) helpers.save_image_fig( img_mtx, params, adv_mtx[:, adv_dict['psnr_input']], ["rand-linf", "linear-linf-1", "linear-linf-10", "linear-linf-20"], "linf", img_size=params["image_dims"]) elif params["norm"] == "pixel": # Epsilon epsilon_range = epsilon_range = np.array([0.1, 0.2, 0.3, 0.5, 0.7]) adv_mtx = np.zeros([len(epsilon_range), len(adv_dict)]) adv_mtx[:, adv_dict['psnr_input']] = epsilon_range # only true for l2 adv_mtx[:, adv_dict['rand-pixel-1']], img_mtx['rand-pixel-1'] = \ adv_noise(test_image, params, epsilon_range, "rand", "pixel", False, 1) adv_mtx[:, adv_dict['linear-pixel-1']], img_mtx['linear-pixel-1'] = \ adv_noise(test_image, params, epsilon_range, "linear", "pixel", False, 1) adv_mtx[:, adv_dict['rand-pixel-10']], img_mtx['rand-pixel-10'] = \ adv_noise(test_image, params, epsilon_range, "rand", "pixel", False, 100) adv_mtx[:, adv_dict['linear-pixel-10']], img_mtx['linear-pixel-10'] = \ adv_noise(test_image, params, epsilon_range, "linear", "pixel", False, 100) helpers.save_image_fig( img_mtx, params, adv_mtx[:, adv_dict['psnr_input']], ['linear-pixel-1', 'linear-pixel-10', 'quadratic-pixel-1'], "pixel", img_size=params["image_dims"]) np.savetxt(params['output_dir'] + '/summary/' + params['model'] + '_psnr_summary_' + params['dataset'] + '.csv', adv_mtx, delimiter=";") helpers.save_psnr_fig(adv_mtx, './results/images/' + params['figs_dir'] + '/' + params['model'] + '_fig_' + params['dataset'] + '_' + params["norm"] + '.png', adv_dict, legend=True)
def generate_100B_dataset(num_examples: int, chunk_size: int) -> datasets.Dataset: table = pa.Table.from_pydict({"col": [0] * chunk_size}) table = pa.concat_tables([table] * (num_examples // chunk_size)) return datasets.Dataset(table, fingerprint="table_100B")
def run_test(data_source, test_source, name, data, algo, sim, latent_factors, iterations, rebuild=False, sklearn_wnmf=False, elementwise=False): t1 = time.time() results_folder = str(algo) data_utility_dir = None filetype = None if data == 'ml': data_utility_dir = 'datasets/ml-100k/utility-matrix/' filetype = 'csv' ds = datasets.TrainingAndTest(data + ' training/test sets') ds.training = datasets.Dataset( name, #name data_source, #original source data_utility_dir + str(name) + '_' + str(algo) + '_um.' + filetype, #utility matrix results_folder + str(name) + '_' + str(algo) + '_' + str(sim) + '_sm.' + filetype, #similarity matrix (or u and v matrices filename after u_ and v_ respectively) data, #data source algo, #algorithm sim, latent_factors, iterations, rebuild_files=rebuild, sklearn=sklearn_wnmf, elwise=elementwise) ds.test = datasets.TestSet( str(name) + ' test set', #name test_source, data) elif data == 'yelp': data_utility_dir = None filetype = None um_location = None if algo == 'wnmf': data_utility_dir = 'datasets/yelp_dataset/' filetype = 'csv' um_location = data_source else: data_utility_dir = 'datasets/yelp_dataset/utility-matrix/' filetype = 'json' um_location = data_utility_dir + 'yelp_review_uc_training_um' + str( algo) + '_um.' + filetype ds = datasets.TrainingAndTest(data + ' training/test sets', latent_factors=latent_factors, iterations=iterations) ds.training = datasets.Dataset( name, #name data_source, #original source um_location, #utility matrix results_folder + str(name) + '_' + str(algo) + '_' + str(sim) + '_sm.' + filetype, #similarity matrix (or u and v matrices filename after u_ and v_ respectively) data, #data source algo, #algorithm sim, latent_factors, iterations) ds.test = datasets.TestSet( str(name) + ' test set', #name test_source, data) if algo == 'user' or algo == 'item': results_folder += '_similarity/' elif algo == 'wnmf': results_folder += '/' if data == 'ml': if algo == 'item': ds.build_ml_item_predictions_df('item_similarity/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv', rebuild=True) elif algo == 'user': ds.build_ml_user_predictions_df('user_similarity/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv', rebuild=True) elif algo == 'wnmf': if sklearn_wnmf: ds.build_ml_wnmf_predictions_df('wnmf/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv', sklearn=True) else: ds.build_ml_wnmf_predictions_df('wnmf/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv') elif data == 'yelp': if algo == 'item': ds.build_yelp_item_predictions_df('item_similarity/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv', rebuild=True) elif algo == 'user': ds.build_yelp_user_predictions_df('user_similarity/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv', rebuild=True) elif algo == 'wnmf': ds.build_yelp_wnmf_predictions_df('wnmf/' + str(name) + '_' + str(algo) + '_' + str(sim) + '_predictions.csv') print('Predictions: ') print(ds.test.predictions_df) if data == 'ml': ds.test.calculate_ml_mae() ds.test.calculate_ml_rmse() elif data == 'yelp': ds.test.calculate_yelp_mae() ds.test.calculate_yelp_rmse() print("MAE: " + str(ds.test.mae)) print("RMSE: " + str(ds.test.rmse)) print('Run time: ' + str(time.time() - t1) + ' sec') log_entry = str( t1) + ',' + name + ',' + data + ',' + algo + ',' + sim + ',' + str( latent_factors) + ',' + str(ds.training.predictor_log) + ',' + str( time.time() - t1) + ',' + str(ds.test.mae) + ',' + str( ds.test.rmse) + ',\n' return ds, log_entry
args=(i, head, rel, tail, dataset, entity_2_train_facts, entity_pair_2_train_facts, all_paths, all_relations, relation_2_tfidf_vec, path_2_df, return_dict)) process_list.append(p) evaluation_fact_index += 1 p.start() for p in process_list: p.join() end = time.time() print(end - start) batch_lines = [] for key in return_dict.keys(): batch_lines.append(";".join([ return_dict[key]["head"], return_dict[key]["relation"], return_dict[key]["tail"], str(return_dict[key]["head_rank"]), str(return_dict[key]["head_ties"]), str(return_dict[key]["tail_rank"]), str(return_dict[key]["tail_ties"]) ]) + "\n") print(return_dict[key]) with open("results.csv", "a") as outfile: outfile.writelines(batch_lines) compute(datasets.Dataset(datasets.FB15K))
import datasets from dataset_analysis.degrees import relation_mentions from dataset_analysis.paths import relation_paths rel_2_mentions = relation_mentions.read(datasets.FB15K) rel_2_paths_counts = relation_paths.read(datasets.Dataset(datasets.FB15K)) rel_2_mentions_items = sorted(rel_2_mentions.items(), key=lambda x: x[1], reverse=True) for relation_and_mentions in rel_2_mentions_items[0:1]: relation = relation_and_mentions[0] mentions = relation_and_mentions[1] print(relation) print("Mentions: " + str(mentions)) path_2_count = rel_2_paths_counts[relation] for path in path_2_count: print("\t" + path + ":" + str(path_2_count[path])) print()
result_dir = project_dir + 'Result/' train_file = 'train_list.txt' test_file = 'test_list.txt' result_file = 'result.txt' input_size = 64 batch_size = 20 max_epochs = 25 class_num = 66 if not os.path.exists(result_dir): os.makedirs(result_dir) if not os.path.exists(train_dir): os.makedirs(train_dir) dataset = datasets.Dataset(train_dir, train_file, test_dir, test_file, result_dir, result_file, batch_size=batch_size, input_size=input_size) net = models.ResNet50(dataset, class_num) net.train(model_file, max_epoches=max_epochs, load_weight=True, should_train=False) net.test()
def doImport(self, document): """Do import.""" pluginnames = [p.name for p in plugins.importpluginregistry] plugin = plugins.importpluginregistry[pluginnames.index( self.params.plugin)] # if the plugin is a class, make an instance # the old API is for the plugin to be instances if isinstance(plugin, type): plugin = plugin() # strip out parameters for plugin itself p = self.params # stick back together the plugin parameter object plugparams = plugins.ImportPluginParams(p.filename, p.encoding, p.pluginpars) results = plugin.doImport(plugparams) # make link for file LF = None if p.linked: LF = linked.LinkedFilePlugin(p) customs = [] # convert results to real datasets names = [] for d in results: if isinstance(d, plugins.Dataset1D): ds = datasets.Dataset(data=d.data, serr=d.serr, perr=d.perr, nerr=d.nerr) elif isinstance(d, plugins.Dataset2D): ds = datasets.Dataset2D(data=d.data, xrange=d.rangex, yrange=d.rangey) elif isinstance(d, plugins.DatasetText): ds = datasets.DatasetText(data=d.data) elif isinstance(d, plugins.DatasetDateTime): ds = datasets.DatasetDateTime(data=d.data) elif isinstance(d, plugins.Constant): customs.append(['constant', d.name, d.val]) continue elif isinstance(d, plugins.Function): customs.append(['function', d.name, d.val]) continue else: raise RuntimeError("Invalid data set in plugin results") # set any linking if linked: ds.linked = LF # construct name name = p.prefix + d.name + p.suffix # actually make dataset document.setData(name, ds) names.append(name) # add constants, functions to doc, if any self.addCustoms(document, customs) self.outdatasets = names self.outcustoms = list(customs)
def _import1dimage(self, hdu): """Import 1d image data form hdu.""" return datasets.Dataset(data=hdu.data)
(dataset.name, output_filepath)) with open(output_filepath, "w") as output_file: output_file.writelines(lines) def read(dataset): test_fact_2_support = dict() input_filepath = os.path.join(dataset.home, FOLDER, TEST_FACT_2_SUPPORT_FILENAME) print( "Reading relation paths support for each test fact in dataset %s from location %s" % (dataset.name, input_filepath)) with open(input_filepath, "r") as input_file: for line in input_file: line = html.unescape(line) head, rel, tail, support_str = line.strip().split(SEPARATOR) support = float(support_str) test_fact_2_support[SEPARATOR.join([head, rel, tail])] = support return test_fact_2_support #save(datasets.Dataset(datasets.FB15K)) #save(datasets.Dataset(datasets.FB15K_237)) #save(datasets.Dataset(datasets.WN18)) save(datasets.Dataset(datasets.WN18RR)) #save(datasets.Dataset(datasets.YAGO3_10))
import datasets as dt import classifiers as clf import preprocess as pr import numpy as np import pandas as pd if __name__ == '__main__': lstm_cv = False np.random.seed(123) # Load in data objects blur = dt.Dataset('data/Oasis_lyrics.pickle', 'Oasis') oasis = dt.Dataset('data/Blur_lyrics.pickle', 'Blur', True) analysis = pr.Analyser(blur, oasis, 0) analysis.get_summaries() analysis.train_test() analysis.get_tfidf() # # Determine optimal number of trees # print('Random Forest:') # tree_count = clf.test_random_forest(analysis, 50, 10, True) # print('Optimal Tree Count: {}.'.format(tree_count)) results = [] for i in np.round(np.linspace(0, 0.9, 10), 1).tolist(): print('Noise Amount: {}'.format(i)) # Preprocess analysis = pr.Analyser(blur, oasis, i) # analysis.get_summaries() analysis.train_test() analysis.get_tfidf()
train_fact_to_two_step_paths = _read_two_step_paths_from_file(input_filepath) input_filepath = os.path.join(dataset.home, FOLDER, TRAIN_FACTS_WITH_THREE_STEP_GRAPH_PATHS_FILENAME) print("Reading three-step graph paths for train facts of dataset %s into location %s" % (dataset.name, input_filepath)) train_fact_to_three_step_paths = _read_three_step_paths_from_file(input_filepath) return train_fact_to_one_step_paths, train_fact_to_two_step_paths, train_fact_to_three_step_paths def read_test(dataset): input_filepath = os.path.join(dataset.home, FOLDER, TEST_FACTS_WITH_ONE_STEP_GRAPH_PATHS_FILENAME) print("Reading one-step graph paths for test facts of dataset %s from location %s" % (dataset.name, input_filepath)) test_fact_to_one_step_paths = _read_one_step_paths_from_file(input_filepath) input_filepath = os.path.join(dataset.home, FOLDER, TEST_FACTS_WITH_TWO_STEP_GRAPH_PATHS_FILENAME) print("Reading two-step graph paths for test facts of dataset %s from location %s" % (dataset.name, input_filepath)) test_fact_to_two_step_paths = _read_two_step_paths_from_file(input_filepath) input_filepath = os.path.join(dataset.home, FOLDER, TEST_FACTS_WITH_THREE_STEP_GRAPH_PATHS_FILENAME) print("Reading three-step graph paths for test facts of dataset %s into location %s" % (dataset.name, input_filepath)) test_fact_to_three_step_paths = _read_three_step_paths_from_file(input_filepath) return test_fact_to_one_step_paths, test_fact_to_two_step_paths, test_fact_to_three_step_paths compute_and_progressively_save(datasets.Dataset(datasets.FB15K)) #compute_and_progressively_save(datasets.Dataset(datasets.FB15K_237)) #compute_and_progressively_save(datasets.Dataset(datasets.WN18)) #compute_and_progressively_save(datasets.Dataset(datasets.WN18RR)) #compute_and_progressively_save(datasets.Dataset(datasets.YAGO3_10))
# -*- coding: utf-8 -*- """ Created on Mon Jun 24 19:08:25 2019 @author: jonathan """ import numpy as np import pandas as pd import math import sys sys.path.insert(0, '../../') import os import datasets temp = datasets.Dataset() def build_item_pearson_sim(dataset = temp, source_filename = 'not_entered', dest_filename): #if a Dataset object is passed as a parameter if dataset is not temp and source_filename == 'not_entered': source_filename = dataset.item_utility_source if dataset.item_utility_df is None: dataset.build_item_utility_df() #if utility matrix not built yet if dataset.item_utility_df is None and source_filename == 'not_entered': print("You must provide either an object containing a source utility matrix, or the location of the source itself") else: utility_np = dataset.item_utility_df.to_numpy() similarity_np = np.zeros((len(utility_np[0]), len(utility_np[0])), dtype=float) #PEARSON CORRELATION FUNCTION def pearson_corr(col1, col2):
def main(cfg): if cfg.training.resume is not None: log_dir = cfg.training.log_dir checkpoint_dir = os.path.dirname(cfg.training.resume) else: timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S.%f') log_dir = os.path.join(cfg.training.logs_dir, '{}_{}'.format(timestamp, cfg.training.experiment_name)) checkpoint_dir = os.path.join(cfg.training.checkpoints_dir, '{}_{}'.format(timestamp, cfg.training.experiment_name)) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) print('log_dir: {}'.format(log_dir)) print('checkpoint_dir: {}'.format(checkpoint_dir)) single_model = models.DRNSeg(cfg.arch, cfg.data.classes, None, pretrained=True) model = torch.nn.DataParallel(single_model).cuda() cudnn.benchmark = True criterion = nn.NLLLoss().cuda() optimizer = torch.optim.SGD(single_model.optim_parameters(), cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) start_epoch = 0 if cfg.training.resume is not None: if os.path.isfile(cfg.training.resume): print("=> loading checkpoint '{}'".format(cfg.training.resume)) checkpoint = torch.load(cfg.training.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format(cfg.training.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(cfg.training.resume)) crop_transform = transforms.CropTransform(shape=(640, 480)) zoom_generator = transforms.RandomIntGenerator(480, 540) zoom_bilinear_transform = transforms.ZoomTransform(interpolation="bilinear", generator=zoom_generator) zoom_nearest_transform = transforms.ZoomTransform(interpolation="nearest", generator=zoom_generator) rotate_freq_generator = transforms.RandomFloatGenerator() rotate_angle_generator = transforms.RandomFloatGenerator() rotate_bilinear_transform = transforms.FrequencyTransform( freq=0.5, transform=transforms.RotateTransform(interpolation="bilinear", generator=rotate_angle_generator), generator=rotate_freq_generator ) rotate_nearest_transform = transforms.FrequencyTransform( freq=0.5, transform=transforms.RotateTransform(interpolation="nearest", generator=rotate_angle_generator), generator=rotate_freq_generator ) brightness_generator = transforms.RandomFloatGenerator() gamma_transform = transforms.BrightnessTransform(0.5, 1.5, brightness_generator) train_image_transforms = (zoom_bilinear_transform, rotate_bilinear_transform, crop_transform, gamma_transform, transforms.ToTensorTransform(torch.FloatTensor)) label_transforms = (zoom_nearest_transform, rotate_nearest_transform, crop_transform, transforms.ToTensorTransform(torch.LongTensor)) train_transforms = transforms.ParallelTransform([train_image_transforms, label_transforms]) val_transforms = transforms.Compose([transforms.ToTensor()]) if cfg.data.train_all: train_dataset = datasets.Dataset(cfg.data.root, cfg.data.ann_file, 'train', train_transforms) else: train_dataset = datasets.Dataset(cfg.data.root, 'train_' + cfg.data.ann_file, 'train', train_transforms) val_dataset = datasets.Dataset( cfg.data.root, 'val_' + cfg.data.ann_file, 'val', val_transforms) print(train_dataset.__len__()) print(val_dataset.__len__()) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=cfg.data.batch_size, shuffle=True, num_workers=cfg.data.workers, pin_memory=True) train_summary_writer = SummaryWriter(log_dir=os.path.join(log_dir, 'train')) val_summary_writer = SummaryWriter(log_dir=os.path.join(log_dir, 'val')) visualization_summary_writer = SummaryWriter(log_dir=os.path.join(log_dir, 'visualization')) for epoch in range(start_epoch, cfg.training.epochs): lr = adjust_learning_rate(optimizer, epoch) train_summary_writer.add_scalar('learning_rate', lr, epoch + 1) train_batch_time, train_data_time, train_loss = train(train_loader, model, criterion, optimizer, epoch) train_summary_writer.add_scalar('batch_time', train_batch_time, epoch + 1) train_summary_writer.add_scalar('data_time', train_data_time, epoch + 1) train_summary_writer.add_scalar('loss', train_loss, epoch + 1) val_batch_time, val_data_time, val_loss, val_accuracy, val_ious = validate(val_loader, model, criterion) val_summary_writer.add_scalar('batch_time', val_batch_time, epoch + 1) val_summary_writer.add_scalar('data_time', val_data_time, epoch + 1) val_summary_writer.add_scalar('loss', val_loss, epoch + 1) val_summary_writer.add_scalar('accuracy', val_accuracy, epoch + 1) for i, iou in enumerate(val_ious): if not np.isnan(iou) and iou != 0: val_summary_writer.add_scalar('iou_{}'.format(cfg.data.class_names[i]), iou, epoch + 1) first_input_batch, first_target_batch = iter(val_loader).next() rendered = visualize_batch(utils.visualize, model, first_input_batch, first_target_batch) visualization_summary_writer.add_image('segmentation', torch.from_numpy(rendered).permute(2, 0, 1), epoch + 1) if (epoch + 1) % cfg.training.checkpoint_epochs == 0: checkpoint_path = save_checkpoint(checkpoint_dir, { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), }, epoch + 1) cfg.training.log_dir = log_dir cfg.training.resume = checkpoint_path with open(os.path.join(log_dir, 'config.yml'), 'w') as f: f.write(cfg.toYAML())
video_rate = 3 conv = {'50Salads':25, "JIGSAWS":20, "MERL":5, "GTEA":25}[dataset] # Which features for the given dataset features = "SpatialCNN" bg_class = 0 if dataset is not "JIGSAWS" else None if dataset == "50Salads": features = "SpatialCNN_" + granularity # ------------------------------------------------------------------ # Evaluate using different filter lengths if 1: # for conv in [5, 10, 15, 20]: # Initialize dataset loader & metrics data = datasets.Dataset(dataset, base_dir) trial_metrics = metrics.ComputeMetrics(overlap=.1, bg_class=bg_class) # Load data for each split for split in data.splits: if sensor_type=="video": feature_type = "A" if model_type != "SVM" else "X" else: feature_type = "S" X_train, y_train, X_test, y_test = data.load_split(features, split=split, sample_rate=video_rate, feature_type=feature_type) if trial_metrics.n_classes is None: trial_metrics.set_classes(data.n_classes)
max_change_angle = (2 * 3.14159) / 500 eye_sensor.position = ( eye_sensor.position[0] + random.gauss(1, .75), eye_sensor.position[1] + random.gauss(1, .75), ) eye_sensor.orientation += random.uniform(-max_change_angle, max_change_angle) eye_sensor.scale = 1 if __name__ == '__main__': eye = Eye() import datasets, random # data = datasets.Dataset('./datasets/small_items') data = datasets.Dataset('./datasets/textures') print("Num Images:", len(data)) data.shuffle() for z in range(len(data)): eye.reset() data.next_image() img_path = data.current_image print("Loading image %s" % img_path) img = np.asarray(PIL.Image.open(img_path)) eye.new_image(img) eye.scale = 1 for i in range(10): sdr = eye.compute() eye.show_view() small_random_movement(eye)