def main(args): options = parse_args(args) #Open up connection to database engine = dbtables.connect(options.db) Session = sessionmaker(bind=engine) session = Session() #Get all of the blanks in groups, as a dictionary by mode. Then compress to a single run # for each mode. Note: the compound information in compressed runs is not very reliable. # the adduct information is what is preserved. blanks = group_cefs(*options.blanks) for mode in blanks: for ind, blank in enumerate(blanks[mode]): blanks[mode][ind] = cefparse.read_cef(blank) blanks[mode] = btools.comb_runs(20, 0.4, *blanks[mode]) runs = [] ceflists = group_cefs(*options.cefs) modes = ceflists.keys() #This is for the parallelization, this says only use this computer, no network cluster ppservers = () if options.cpus: job_server = pp.Server(options.cpus, ppservers=ppservers, secret='acetone') else: job_server = pp.Server(ppservers=ppservers, secret='acetone') print "Running with ", job_server.get_ncpus(), " CPU's" widgets = ['Submitting Jobs: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(),\ ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=len(ceflists.values()[0])).start() for ind, group in enumerate(zip(*ceflists.values())): if len(group) < 4: print "Skipping: ", group continue small = dict([(mode, cef) for mode, cef in zip(modes, group)]) #Submit the group as a job to parallel process print group runs.append(job_server.submit(process, (small, blanks, options.cutoff, options.rt,), (), ("btools", "sys", "re", "cefparse", "sqlalchemy.orm", "sqlalchemy.ext.declarative", "dbtables", "val",))) pbar.update(ind + 1) pbar.finish() #This portion is for the progress bar. widgets = ['COMMITING: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(),\ ' ', FileTransferSpeed()] pbar = ProgressBar(widgets=widgets, maxval=len(runs)).start() for ind, run in enumerate(runs): session.add(run()) session.commit() pbar.update(ind + 1) pbar.finish()
def process(run_cefs, blank_cefs, cutoff, rtcut): """Function accepts 2 dictionaries. One of actual run cef files which are mode-file NAME pairs, while the other is blank (noise) cef files which are mode-IDrun object pairs. Function subtracts the blanks, removes saturated peaks, validates the peaks between 4GHz and 2GHz mode, combines positive and negative data, and writes information to disk. Note the modes have to match between the two cef file dictionaries with pos_2g, pos_4g, neg_2g, and neg_4g as keys. """ for mode in run_cefs: run_cefs[mode] = cefparse.read_cef(run_cefs[mode]) btools.rmblank(run_cefs[mode], blank_cefs[mode]) return val.validate(cutoff, rtcut, *run_cefs.values())