Example #1
0
def main():

	client, db = mongo.connect()
	
	parser = argparse.ArgumentParser(description='TEST SET MANAGEMENT')
	parser.add_argument('dataset', help='the name of the test set', nargs='?')
	parser.add_argument('-d', '--delete', help='Removes the spectrum from the lists of spectra in the database')
	parser.set_defaults(which='main')

	subparser = parser.add_subparsers(help='TEST SET COMMANDS')

	delete = subparser.add_parser('delete', help="Removes the spectrum from the lists of spectra in the database")
	delete.set_defaults(which='del')

	insert = subparser.add_parser('insert', help="Inserts processed spectra into the database")
	insert.set_defaults(which='insert')

	cp = subparser.add_parser('cp', help='COPY')
	cp.add_argument('name', help='The new testset name')
	cp.add_argument('-f', '--full', action='store_true', help='Copy all files in dir')
	cp.add_argument('-s', '--success', action='store_true', help='Copy only successfully optimised molecules')
	cp.set_defaults(which='cp')

	nw = subparser.add_parser('nw', help='NEW')
	nw.set_defaults(which='nw')

	rb = subparser.add_parser('rebuild', help='REBUILD')
	rb.set_defaults(which='rebuild')

	minimise = subparser.add_parser('mini', help='MINIMISATION OPERATIONS')
	minimise.add_argument('-c', '--conformers', type=int, help='Set the number of conformers to try')
	minimise.add_argument('-r', '--rdkit', action='store_true', help='Minimise with rdkit mmff')
	minimise.set_defaults(which='minimise')

	convert = subparser.add_parser('convert', help='CONVERSION OPERATIONS')
	convert.add_argument('-a', '--arguments', nargs='+', default=[], help='Supply the conversion arguments')
	convert.add_argument('-s', '--savecpm', help='Location to save the com file')
	# convert.add_argument('convert', help='Convert to com file, default save as g_safename in same dir')
	convert.set_defaults(which='convert')

	molecules = subparser.add_parser('molecule', help='MOLECULE OPERATIONS')
	molecules.add_argument('-a', '--add', metavar='<FILE>', type=argparse.FileType('r', 0), help='Add molecules from file to the test set')
	molecules.add_argument('-r', '--remove', nargs='+', help='Remove named molecules, or all')
	molecules.add_argument('-n', '--num', action='store_true', help='How many molecules in the dataset')
	molecules.add_argument('-c', '--convert', action='store_true', help='Convert to com file')
	molecules.add_argument('--molfiles', default=False, help='Dump to molfiles, with no hydrgens (for Chemdraw)')
	molecules.set_defaults(which='molecules')

	tensors = subparser.add_parser('tensor', help='TENSOR OPERATIONS')
	tensors.add_argument('-p', '--process', action='store_true', help='Process the tensors from the dataset')
	tensors.add_argument('-o', '--output', help='Writes a tensor analysis to csv')
	tensors.set_defaults(which='tensors')

	printer = subparser.add_parser('print', help='PRINT OPERATIONS')
	printer.add_argument('pprint', metavar='<ATTRIBUTE>', help='Print a dataset attribute by name')
	printer.set_defaults(which='printing')

	args = parser.parse_args()

	if args.dataset.endswith('/'):
		print 'No trailing slash!'
		sys.exit(1)

	if not os.path.exists(args.dataset) and args.which != 'nw':
		print "dataset directory does not exist"
		sys.exit(1)

	elif args.which == 'nw':
		print 'Creating new dataset directory: ' + args.dataset

		if os.path.isdir(args.dataset):
			print 'Directory already exists!'
			sys.exit(1)

		else:
			dataset = testset.TestSet(args.dataset)
			print 'Serialising...'
			dataset.save()
			sys.exit(0)

	elif os.path.exists(args.dataset):
		try:
			dataset = testset.TestSet.deserialise(args.dataset)
		except IOError:
			print 'This dataset has a filestructure but no serialised set found'
			i = raw_input("Would you like to create one? y/n ")

			if i == 'y':
				dataset = testset.TestSet.build(db, args.dataset)
				print 'Dataset {0} created with {1} molecules'.format(dataset.dataset, len(dataset.molecules))
			else:
				return

	if args.which == 'del':
		mongo.delete_spectrum(db, dataset.dataset)

	elif args.which == 'printing':

		if '.' in args.pprint:
			args.pprint, attr2 = args.pprint.split('.')

		if hasattr(dataset, args.pprint):
			attr = getattr(dataset, args.pprint)

			if isinstance(attr, list):
				for item in attr:
					if hasattr(item, attr2):
						print getattr(item, attr2)
					else:
						print item
			else:
				print attr

		else:
			print 'No attribute with name "{0}" found'.format(args.pprint)

	if args.which == 'molecules':

		if args.add:
			print 'Reading molecules from {0}'.format(args.add.name)
			dataset.import_molecules(db, args.add)
			dataset.save()

		elif args.remove:

			if args.remove == ['all']:
				i = raw_input("""Are you sure you wish to remove all molecules from this dataset? All directories will be removed - y/n: """)
				if i == 'y':
					dataset.remove_molecules(dataset.molecules)
					dataset.save()
				else:
					sys.exit(0)
			else:
				print 'Removing molecules: '+str(args.remove)
				dataset.remove_molecules(args.remove)

		elif args.num:
			print '{0} molecules in the dataset'.format(len(dataset.molecules))

		elif args.molfiles:

			cwd = os.getcwd()
			location = os.path.join(cwd, args.molfiles)

			for m in dataset.molecules:
				m.mol_to_molfile(dir=location, hydrogens=False)


	elif args.which == 'convert':

		if args.arguments:
			params = {'cpus':args.arguments[0], 'memory':args.arguments[1], 'command':args.arguments[2]}
		else:
			params = molecule.input_gaussian_params()

		dataset.prepare_com_files(**params)


	elif args.which == 'tensors':

		if args.process:
			dataset.process_logs()

		elif args.output:
			"""Write the tensors to file"""

			stream = tensor_analysis.TensorStream.build(db, args.dataset)
			stream.scale_stream()

			with open(args.output, 'w') as handle:
				handle.write('Output from dataset {} on {}\n'.format(args.dataset, str(dt.now())))
				handle.write('molecule, atomid, hybridisation, mulliken charge, exp, calc, scaled error\n')
				for line in stream:
					print line
					handle.write('{}\n'.format(','.join(map(str,line))))

				print 'Wrote output to {}'.format(args.output)


	elif args.which == 'cp':

		if os.path.exists(args.name):
			print 'This dir exists, please choose another name'
			sys.exit(1)

		else:
			print 'Duplicating {0} to {1}'.format(dataset.dataset, args.name)
			dataset = dataset.duplicate_set(args.name, args.full, args.success)

			dataset.save()


	elif args.which == 'minimise':

		if args.conformers:

			print 'Scanning conformer space with {0} starting points'.format(args.conformers)
			dataset.minimise_molecules(molecule.m_rdkit_conformers, confs=args.conformers)
			dataset.save()

		elif args.rdkit:

			print 'Minimising with RDKIT'
			dataset.minimise_molecules(molecule.m_rdkit)
			dataset.save()


	elif args.which == 'rebuild':

		print 'Rebuilding dataset'
		newset = dataset.build(db, dataset.dataset)
		newset.save()


	elif args.which == 'insert':

		print 'Inserting tensors'
		dataset.insert_tensors(db)
Example #2
0
def main ():

	client, db = mongo.connect()
	
	parser = argparse.ArgumentParser(description='UTILITIES')
	parser.add_argument('molecule', help='the name of the molecule/molecues', nargs='?')
	parser.set_defaults(which='main')

	subparser = parser.add_subparsers(help='TEST SET COMMANDS')

	plotter = subparser.add_parser('plot', help='PLOTTING OPERATIONS')
	plotter.add_argument('-d', '--dataset', required=True, help='Dataset to plot')
	plotter.add_argument('-s', '--savefig', help='Location to save the figure')
	plotter.set_defaults(which='plotting')

	converter = subparser.add_parser('convert', help='CONVERSION OPERATIONS')
	converter.add_argument('-s', '--save', help='Name the com file')
	converter.set_defaults(which='convert')

	spectra = subparser.add_parser('spectra', help='SPECTRA GRAB')
	spectra.add_argument('-s', '--savefig', help='Location to save the figure')
	spectra.add_argument('-d', '--dataset', help='dataset to examine, otherwise all', default=None)
	spectra.set_defaults(which='spectra')

	args = parser.parse_args()

	mol = molecule.Molecule(os.getcwd()).pull_record(db, {'g_safename':args.molecule})

	if args.which == 'plotting':

		if args.savefig:
			loc = args.savefig
		else:
			loc = None

		main_title = mol.g_safename + ' ' + args.dataset 

		shift_sets = tensor_analysis.molecule_tensor_analysis(mol, args.dataset)
		exp, calc, names = tensor_analysis.extract_data(shift_sets)

		fig = plotting.Figure(dimensions=(12,5), cols=1, main_title=main_title)
		plotting.plot_tensor_scatter(calc, exp, fig, loc, legend=False)

		fig.show_figure()

		# plotting.plot_tensors(tensors, title=args.dataset, loc=loc)


	elif args.which == 'convert':

		params = molecule.input_gaussian_params()
		molecule.convert_to_com(mol, **params)
		mol.cleanup()

		if args.save:

			if not args.save.endswith('.com'):
				args.save += '.com'

			old = os.path.join(os.getcwd(), mol.g_safename, mol.g_safename+'.com')
			new = os.path.join(os.path.dirname(old), args.save)

			os.rename(old, new)


	elif args.which == 'spectra':

		shift_list = []

		for x in mol.get_spectra(nuclei='13C'):

			if hasattr(x, 'dataset'):
				name = "(C) " + x.dataset
			else:
				name = "(E) " + str(x.assignment_method)

			shifts = sorted(x.shifts, key=lambda y: y[0])
			shifts.insert(0, name)

			shift_list.append(shifts)

		print '\n'

		for sl in sublist(shift_list, 5):
			for line in zip(*sl):
				# print shifts
				print ''.join(['{:<30}'.format(x) for x in line])
			print '\n'

		exp_spectra = mol.get_spectra(computed=False, nuclei='13C')

		if args.dataset:
			comp_spectra = mol.get_spectra(computed=True, dataset=args.dataset, assignment_method='Computed')
		else:	
			comp_spectra = mol.get_spectra(computed=True, assignment_method='Computed')

		if comp_spectra:
			print '{:<30}{:<25}{:<20}{:<20}{:<20}'.format('Experimental', 'Computed', 'slope', 'intercept', 'R')

			for exp_spec in exp_spectra:
				for comp_spec in comp_spectra:

					stats = tensor_analysis.regres_stats(comp_spec, exp_spec)
					print '{:<30}{:<25}{:<20}{:<20}{:<20}'.format(exp_spec.assignment_method, comp_spec.dataset, round(stats[0],4),
						round(stats[1],4), round(stats[2],4))

					if args.savefig:
						print 'Saving figure to {0}'.format(saveto)
						t = tensor_analysis.molecule_tensor_analysis(mol, comp_spec.dataset)
						plotting.plot_tensors(t, title=mol.g_safename, loc=saveto)