def main(): client, db = mongo.connect() parser = argparse.ArgumentParser(description='TEST SET MANAGEMENT') parser.add_argument('dataset', help='the name of the test set', nargs='?') parser.add_argument('-d', '--delete', help='Removes the spectrum from the lists of spectra in the database') parser.set_defaults(which='main') subparser = parser.add_subparsers(help='TEST SET COMMANDS') delete = subparser.add_parser('delete', help="Removes the spectrum from the lists of spectra in the database") delete.set_defaults(which='del') insert = subparser.add_parser('insert', help="Inserts processed spectra into the database") insert.set_defaults(which='insert') cp = subparser.add_parser('cp', help='COPY') cp.add_argument('name', help='The new testset name') cp.add_argument('-f', '--full', action='store_true', help='Copy all files in dir') cp.add_argument('-s', '--success', action='store_true', help='Copy only successfully optimised molecules') cp.set_defaults(which='cp') nw = subparser.add_parser('nw', help='NEW') nw.set_defaults(which='nw') rb = subparser.add_parser('rebuild', help='REBUILD') rb.set_defaults(which='rebuild') minimise = subparser.add_parser('mini', help='MINIMISATION OPERATIONS') minimise.add_argument('-c', '--conformers', type=int, help='Set the number of conformers to try') minimise.add_argument('-r', '--rdkit', action='store_true', help='Minimise with rdkit mmff') minimise.set_defaults(which='minimise') convert = subparser.add_parser('convert', help='CONVERSION OPERATIONS') convert.add_argument('-a', '--arguments', nargs='+', default=[], help='Supply the conversion arguments') convert.add_argument('-s', '--savecpm', help='Location to save the com file') # convert.add_argument('convert', help='Convert to com file, default save as g_safename in same dir') convert.set_defaults(which='convert') molecules = subparser.add_parser('molecule', help='MOLECULE OPERATIONS') molecules.add_argument('-a', '--add', metavar='<FILE>', type=argparse.FileType('r', 0), help='Add molecules from file to the test set') molecules.add_argument('-r', '--remove', nargs='+', help='Remove named molecules, or all') molecules.add_argument('-n', '--num', action='store_true', help='How many molecules in the dataset') molecules.add_argument('-c', '--convert', action='store_true', help='Convert to com file') molecules.add_argument('--molfiles', default=False, help='Dump to molfiles, with no hydrgens (for Chemdraw)') molecules.set_defaults(which='molecules') tensors = subparser.add_parser('tensor', help='TENSOR OPERATIONS') tensors.add_argument('-p', '--process', action='store_true', help='Process the tensors from the dataset') tensors.add_argument('-o', '--output', help='Writes a tensor analysis to csv') tensors.set_defaults(which='tensors') printer = subparser.add_parser('print', help='PRINT OPERATIONS') printer.add_argument('pprint', metavar='<ATTRIBUTE>', help='Print a dataset attribute by name') printer.set_defaults(which='printing') args = parser.parse_args() if args.dataset.endswith('/'): print 'No trailing slash!' sys.exit(1) if not os.path.exists(args.dataset) and args.which != 'nw': print "dataset directory does not exist" sys.exit(1) elif args.which == 'nw': print 'Creating new dataset directory: ' + args.dataset if os.path.isdir(args.dataset): print 'Directory already exists!' sys.exit(1) else: dataset = testset.TestSet(args.dataset) print 'Serialising...' dataset.save() sys.exit(0) elif os.path.exists(args.dataset): try: dataset = testset.TestSet.deserialise(args.dataset) except IOError: print 'This dataset has a filestructure but no serialised set found' i = raw_input("Would you like to create one? y/n ") if i == 'y': dataset = testset.TestSet.build(db, args.dataset) print 'Dataset {0} created with {1} molecules'.format(dataset.dataset, len(dataset.molecules)) else: return if args.which == 'del': mongo.delete_spectrum(db, dataset.dataset) elif args.which == 'printing': if '.' in args.pprint: args.pprint, attr2 = args.pprint.split('.') if hasattr(dataset, args.pprint): attr = getattr(dataset, args.pprint) if isinstance(attr, list): for item in attr: if hasattr(item, attr2): print getattr(item, attr2) else: print item else: print attr else: print 'No attribute with name "{0}" found'.format(args.pprint) if args.which == 'molecules': if args.add: print 'Reading molecules from {0}'.format(args.add.name) dataset.import_molecules(db, args.add) dataset.save() elif args.remove: if args.remove == ['all']: i = raw_input("""Are you sure you wish to remove all molecules from this dataset? All directories will be removed - y/n: """) if i == 'y': dataset.remove_molecules(dataset.molecules) dataset.save() else: sys.exit(0) else: print 'Removing molecules: '+str(args.remove) dataset.remove_molecules(args.remove) elif args.num: print '{0} molecules in the dataset'.format(len(dataset.molecules)) elif args.molfiles: cwd = os.getcwd() location = os.path.join(cwd, args.molfiles) for m in dataset.molecules: m.mol_to_molfile(dir=location, hydrogens=False) elif args.which == 'convert': if args.arguments: params = {'cpus':args.arguments[0], 'memory':args.arguments[1], 'command':args.arguments[2]} else: params = molecule.input_gaussian_params() dataset.prepare_com_files(**params) elif args.which == 'tensors': if args.process: dataset.process_logs() elif args.output: """Write the tensors to file""" stream = tensor_analysis.TensorStream.build(db, args.dataset) stream.scale_stream() with open(args.output, 'w') as handle: handle.write('Output from dataset {} on {}\n'.format(args.dataset, str(dt.now()))) handle.write('molecule, atomid, hybridisation, mulliken charge, exp, calc, scaled error\n') for line in stream: print line handle.write('{}\n'.format(','.join(map(str,line)))) print 'Wrote output to {}'.format(args.output) elif args.which == 'cp': if os.path.exists(args.name): print 'This dir exists, please choose another name' sys.exit(1) else: print 'Duplicating {0} to {1}'.format(dataset.dataset, args.name) dataset = dataset.duplicate_set(args.name, args.full, args.success) dataset.save() elif args.which == 'minimise': if args.conformers: print 'Scanning conformer space with {0} starting points'.format(args.conformers) dataset.minimise_molecules(molecule.m_rdkit_conformers, confs=args.conformers) dataset.save() elif args.rdkit: print 'Minimising with RDKIT' dataset.minimise_molecules(molecule.m_rdkit) dataset.save() elif args.which == 'rebuild': print 'Rebuilding dataset' newset = dataset.build(db, dataset.dataset) newset.save() elif args.which == 'insert': print 'Inserting tensors' dataset.insert_tensors(db)
def main (): client, db = mongo.connect() parser = argparse.ArgumentParser(description='UTILITIES') parser.add_argument('molecule', help='the name of the molecule/molecues', nargs='?') parser.set_defaults(which='main') subparser = parser.add_subparsers(help='TEST SET COMMANDS') plotter = subparser.add_parser('plot', help='PLOTTING OPERATIONS') plotter.add_argument('-d', '--dataset', required=True, help='Dataset to plot') plotter.add_argument('-s', '--savefig', help='Location to save the figure') plotter.set_defaults(which='plotting') converter = subparser.add_parser('convert', help='CONVERSION OPERATIONS') converter.add_argument('-s', '--save', help='Name the com file') converter.set_defaults(which='convert') spectra = subparser.add_parser('spectra', help='SPECTRA GRAB') spectra.add_argument('-s', '--savefig', help='Location to save the figure') spectra.add_argument('-d', '--dataset', help='dataset to examine, otherwise all', default=None) spectra.set_defaults(which='spectra') args = parser.parse_args() mol = molecule.Molecule(os.getcwd()).pull_record(db, {'g_safename':args.molecule}) if args.which == 'plotting': if args.savefig: loc = args.savefig else: loc = None main_title = mol.g_safename + ' ' + args.dataset shift_sets = tensor_analysis.molecule_tensor_analysis(mol, args.dataset) exp, calc, names = tensor_analysis.extract_data(shift_sets) fig = plotting.Figure(dimensions=(12,5), cols=1, main_title=main_title) plotting.plot_tensor_scatter(calc, exp, fig, loc, legend=False) fig.show_figure() # plotting.plot_tensors(tensors, title=args.dataset, loc=loc) elif args.which == 'convert': params = molecule.input_gaussian_params() molecule.convert_to_com(mol, **params) mol.cleanup() if args.save: if not args.save.endswith('.com'): args.save += '.com' old = os.path.join(os.getcwd(), mol.g_safename, mol.g_safename+'.com') new = os.path.join(os.path.dirname(old), args.save) os.rename(old, new) elif args.which == 'spectra': shift_list = [] for x in mol.get_spectra(nuclei='13C'): if hasattr(x, 'dataset'): name = "(C) " + x.dataset else: name = "(E) " + str(x.assignment_method) shifts = sorted(x.shifts, key=lambda y: y[0]) shifts.insert(0, name) shift_list.append(shifts) print '\n' for sl in sublist(shift_list, 5): for line in zip(*sl): # print shifts print ''.join(['{:<30}'.format(x) for x in line]) print '\n' exp_spectra = mol.get_spectra(computed=False, nuclei='13C') if args.dataset: comp_spectra = mol.get_spectra(computed=True, dataset=args.dataset, assignment_method='Computed') else: comp_spectra = mol.get_spectra(computed=True, assignment_method='Computed') if comp_spectra: print '{:<30}{:<25}{:<20}{:<20}{:<20}'.format('Experimental', 'Computed', 'slope', 'intercept', 'R') for exp_spec in exp_spectra: for comp_spec in comp_spectra: stats = tensor_analysis.regres_stats(comp_spec, exp_spec) print '{:<30}{:<25}{:<20}{:<20}{:<20}'.format(exp_spec.assignment_method, comp_spec.dataset, round(stats[0],4), round(stats[1],4), round(stats[2],4)) if args.savefig: print 'Saving figure to {0}'.format(saveto) t = tensor_analysis.molecule_tensor_analysis(mol, comp_spec.dataset) plotting.plot_tensors(t, title=mol.g_safename, loc=saveto)