def make_hist(smiles): mol = Chem.MolFromSmiles(smiles) atoms = mol.GetAtoms() hist = np.zeros(utils.dataset_info(dataset)['hist_dim']) for atom in atoms: if dataset == 'qm9': atom_str = atom.GetSymbol() else: # zinc dataset # transform using "<atom_symbol><valence>(<charge>)" notation symbol = atom.GetSymbol() valence = atom.GetTotalValence() charge = atom.GetFormalCharge() atom_str = "%s%i(%i)" % (symbol, valence, charge) if atom_str not in utils.dataset_info(dataset)['atom_types']: print('Unrecognized atom type %s' % atom_str) return None ind = utils.dataset_info(dataset)['atom_types'].index(atom_str) val = utils.dataset_info(dataset)['maximum_valence'][ind] hist[ val - 1] += 1 # in the array the valence number start from 1, instead the array start from 0 return hist
def __init__(self, args): self.args = args # Collect argument things: data_dir = '' if '--data_dir' in args and args['--data_dir'] is not None: data_dir = args['--data_dir'] self.data_dir = data_dir # Collect parameters: params = self.default_params() config_file = args.get('--config-file') if config_file is not None: with open(config_file, 'r') as f: params.update(json.load(f)) config = args.get('--config') if config is not None: params.update(json.loads(config)) self.params = params # Get which dataset in use self.params['dataset'] = dataset = args.get('--dataset') # Number of atom types of this dataset self.params['num_symbols'] = len(dataset_info(dataset)["atom_types"]) self.run_id = "_".join( [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) log_dir = args.get('--log_dir') or '.' self.log_file = os.path.join(log_dir, "%s_log_%s.json" % (self.run_id, dataset)) self.best_model_file = os.path.join(log_dir, "%s_model.pickle" % self.run_id) with open( os.path.join(log_dir, "%s_params_%s.json" % (self.run_id, dataset)), "w") as f: json.dump(params, f) print("Run %s starting with following parameters:\n%s" % (self.run_id, json.dumps(self.params))) random.seed(params['random_seed']) np.random.seed(params['random_seed']) # Load data: self.max_num_vertices = 0 self.num_edge_types = 0 self.annotation_size = 0 self.train_data = self.load_data(params['train_file'], is_training_data=True) self.valid_data = self.load_data(params['valid_file'], is_training_data=False) # Build the actual model config = tf.ConfigProto() config.gpu_options.allow_growth = True self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): tf.set_random_seed(params['random_seed']) self.placeholders = {} self.weights = {} self.ops = {} self.make_model() self.make_train_step() # Restore/initialize variables: restore_file = args.get('--restore') if restore_file is not None: self.restore_model(restore_file) else: self.initialize_model()
total, tree_count = utils.check_cyclic(dataset) sascorer, sa_score_per_molecule = utils.check_sascorer(dataset) total, validity = utils.check_validity(dataset) print("------------------------------------------") print("Metrics") print("------------------------------------------") print("total molecule") print(total) print("------------------------------------------") print("percentage of nonplanar:") print(nonplanar / total) print("------------------------------------------") print("avg atom:") for atom_type, c in atom_counter.items(): print(dataset_info(dataset)['atom_types'][atom_type]) print(c / total) print("standard deviation") print(np.std(atom_per_molecule, axis=0)) print("------------------------------------------") print("avg edge_type:") for edge_type, c in edge_type_counter.items(): print(edge_type + 1) print(c / total) print("standard deviation") print(np.std(edge_type_per_molecule, axis=0)) print("------------------------------------------") print("avg shape:") for shape, c in zip(utils.geometry_numbers, shape_count): print(shape) print(c / total)
img_k = img_k[shuffle_idx] k = model_k(img_k) # N x C k = k[reverse_idx].detach() # reverse and no graident to key # update dictionary queue = enqueue(queue, k) if queue is not None else k queue = dequeue(queue) return { 'loss': losses.avg, 'pred': pred_meter.avg }, queue if __name__ == '__main__': args = parse_option() image_size, mean, std = dataset_info(name='cifar') # image_size = 28 # mean = [0.1307, ] # std = [0.3081, ] # normalize = transforms.Normalize(mean=mean, std=std) train_transform = get_transform(image_size, mean=mean, std=std, mode='train') # datasets.mnist.MNIST train_dataset = custom_dataset(datasets.cifar.CIFAR10)(root='./', train=True, transform=train_transform, download=True) print(len(train_dataset)) train_dataloader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=False, drop_last=True) # drop the last batch due to irregular size model_q, model_k = get_model(config.MODEL)
def __init__(self, args): self.args = args # Collect argument things: data_dir = '' if '--data_dir' in args and args['--data_dir'] is not None: data_dir = args['--data_dir'] self.data_dir = data_dir # Collect parameters: params = self.default_params() config_file = args.get('--config-file') if config_file is not None: with open(config_file, 'r') as f: params.update(json.load(f)) config = args.get('--config') if config is not None: params.update(json.loads(config)) self.params = params # adjust variables values if self.params['generation'] == 2: # for reconstruction self.params['try_different_starting'] = False self.params['use_argmax_nodes'] = True self.params['use_argmax_bonds'] = True if self.params['generation'] == 3: # for testing self.params['try_different_starting'] = False self.params['use_argmax_nodes'] = True self.params['use_argmax_bonds'] = True # use only cpu if not self.params['use_gpu']: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # Get which dataset in use self.params['dataset'] = dataset = args.get('--dataset') # Number of atom types of this dataset self.params['num_symbols'] = len(dataset_info(dataset)["atom_types"]) suff = "_" + self.params['suffix'] if self.params[ 'suffix'] is not None else "" self.run_id = "_".join( [time.strftime("%Y-%m-%d-%H-%M-%S"), str(os.getpid())]) log_dir = self.params['log_dir'] self.log_file = os.path.join( log_dir, "%s_log_%s%s.json" % (self.run_id, dataset, suff)) self.best_model_file = os.path.join( log_dir, "%s_model%s.pickle" % (self.run_id, suff)) with open( os.path.join( log_dir, "%s_params_%s%s.json" % (self.run_id, dataset, suff)), "w") as f: json.dump(params, f) print("Run %s starting with following parameters:\n%s" % (self.run_id, json.dumps(self.params))) # Set random seeds random.seed(params['random_seed']) np.random.seed(params['random_seed']) # Load data: self.max_num_vertices = 0 self.num_edge_types = 0 self.annotation_size = 0 if self.params['generation'] == 0: train_data, self.train_data = self.load_data(params['train_file'], is_training_data=True) else: train_data, self.train_data = self.load_data( params['train_file'], is_training_data=False) valid_data, self.valid_data = self.load_data(params['valid_file'], is_training_data=False) test_data, self.test_data = self.load_data(params['test_file'], is_training_data=False) self.histograms = dict() self.histograms['hist_dim'] = utils.dataset_info( self.params['dataset'])['hist_dim'] self.histograms['max_valence'] = utils.dataset_info( self.params['dataset'])['max_valence_value'] self.max_num_vertices = dataset_info(dataset)["max_n_atoms"] self.histograms['train'] = self.prepareHist(train_data) # A = number of atoms in a molecule, N = number of histograms # With filter we create a list of max(A) lists, which each list inside the main one are the weights for each histogram # according to the number of atoms # 0 return the frequency, 1 return the prob self.histograms['filter'] = HM.v_filter(self.histograms['train'][0], self.histograms['train'][1], self.max_num_vertices) self.histograms['valid'] = self.prepareHist(valid_data) self.histograms['test'] = self.prepareHist(test_data) # print(self.histograms['filter']) # Build the actual model config = tf.ConfigProto() config.gpu_options.allow_growth = True self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph, config=config) with self.graph.as_default(): tf.set_random_seed(params['random_seed']) self.placeholders = {} self.weights = {} self.ops = {} self.make_model() self.make_train_step() # Restore/initialize variables: restore_file = args.get('--restore') if restore_file is not None: self.restore_model(restore_file) else: self.initialize_model()