def _multiparse(filepath, metalist, win): dirname = os.path.dirname(os.path.realpath(__file__)) if not any(x in sys.argv for x in ('-h', '--help', '--version')): _ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False) _ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False) _ims.terms.update(_ms.terms) else: _ms, _ims = None, None _ims.merge(_ms) PARSERS = {'mzML': mzml.mzMLmeta, 'imzML': mzml.imzMLmeta} ONTOLOGIES = {'mzML': _ms, 'imzML': _ims} print('Parsing file: {}'.format(filepath)) parser = PARSERS[filepath.split(os.path.extsep)[-1]] ont = ONTOLOGIES[filepath.split(os.path.extsep)[-1]] meta = parser(filepath, ont).meta metalist.append(meta)
def full_parse(in_dir, out_dir, study_identifier, usermeta=None, split=True, merge=False, verbose=False, multip=False): """ Parses every study from *in_dir* and then creates ISA files. A new folder is created in the out directory bearing the name of the study identifier. :param str in_dir: path to directory containing studies :param str out_dir: path to out directory :param str study_identifier: name of the study (directory to create) """ dirname = os.path.dirname(os.path.realpath(__file__)) if not any(x in sys.argv for x in ('-h', '--help', '--version')): ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False) ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False) ims.terms.update(ms.terms) else: ms, ims = None, None ims.merge(ms) PARSERS = {'mzML': mzml.mzMLmeta, 'imzML': mzml.imzMLmeta} ONTOLOGIES = {'mzML': ms, 'imzML': ims} # get mzML file in the example_files folder if os.path.isfile(in_dir) and tarfile.is_tarfile(in_dir): compr = True mzml_files = compr_extract(in_dir, "tar") elif os.path.isfile(in_dir) and zipfile.is_zipfile(in_dir): compr = True mzml_files = compr_extract(in_dir, "zip") else: compr = False mzml_path = os.path.join(in_dir, "*mzML") if verbose: print(mzml_path) mzml_files = [mzML for mzML in glob.glob(mzml_path)] #mzml_files.sort() # if multip: # pool = Pool(multip) manager = Manager() metalist = manager.list() if mzml_files: # store the first mzml_files extension if compr: ext1 = mzml_files[0].name.split(os.path.extsep)[-1] else: ext1 = mzml_files[0].split(os.path.extsep)[-1] if multip: jobs = [] for i in mzml_files: p = Process(target=_multiparse, args=(i, metalist)) jobs.append(p) p.start() for proc in jobs: proc.join() # get meta information for all files elif not verbose: pbar = pb.ProgressBar(widgets=[ 'Parsing {:8}: '.format(study_identifier), pb.FormatLabel('%(value)4d'), '/', '%4d' % len(mzml_files), pb.Bar(marker=MARKER, left=" |", right="| "), pb.ETA() ]) for i in pbar(mzml_files): if compr: ext = i.name.split(os.path.extsep)[-1] else: ext = i.split(os.path.extsep)[-1] parser = PARSERS[ext] ont = ONTOLOGIES[ext] metalist.append(parser(i, ont).meta) else: for i in mzml_files: print("Parsing file: {}".format(i)) if compr: ext = i.name.split(os.path.extsep)[-1] else: ext = i.split(os.path.extsep)[-1] parser = PARSERS[ext] ont = ONTOLOGIES[ext] metalist.append(parser(i, ont).meta) # update isa-tab file if merge and ext1 == 'imzML': if verbose: print('Attempting to merge profile and centroid scans') metalist = merge_spectra(metalist) if metalist: if verbose: print("Parsing mzML meta information into ISA-Tab structure") isa_tab_create = isa.ISA_Tab(out_dir, study_identifier, usermeta or {}).write(metalist, ext1, split) else: warnings.warn("No files were found in {}.".format(in_dir), UserWarning)
def full_parse(in_dir, out_dir, study_identifier, usermeta=None, split=True, merge=False, verbose=False, multip=False): """ Parses every study from *in_dir* and then creates ISA files. A new folder is created in the out directory bearing the name of the study identifier. :param str in_dir: path to directory containing studies :param str out_dir: path to out directory :param str study_identifier: name of the study (directory to create) """ dirname = os.path.dirname(os.path.realpath(__file__)) if not any(x in sys.argv for x in ('-h', '--help', '--version')): _ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False) _ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False) _ims.terms.update(_ms.terms) else: _ms, _ims = None, None _ims.merge(_ms) _PARSERS = {'mzML': mzml.mzMLmeta, 'imzML': mzml.imzMLmeta} _ONTOLOGIES = {'mzML': _ms, 'imzML': _ims} # get mzML file in the example_files folder if os.path.isfile(in_dir) and tarfile.is_tarfile(in_dir): compr = True mzml_files = compr_extract(in_dir, "tar") elif os.path.isfile(in_dir) and zipfile.is_zipfile(in_dir): compr = True mzml_files = compr_extract(in_dir, "zip") else: compr = False mzml_path = os.path.join(in_dir, "*mzML") if verbose: print(mzml_path) mzml_files = [mzML for mzML in glob.glob(mzml_path)] #mzml_files.sort() if multip: pool = Pool(multip) metalist = [] if mzml_files: if multip: ppservers = () ncpus = 2 job_server = pp.Server(ncpus, ppservers=ppservers) print "Starting pp with", job_server.get_ncpus(), "workers" job1 = job_server.submit(_multiparse, (100,), (isprime,), ("math",)) multi_in = [[i, _PARSERS, _ONTOLOGIES] for i in mzml_files] print multi_in for i in multi_in: multiparse(i) jobs = [] for i in multi_in: p = Process(target=multiparse, args=(i,)) jobs.append(p) p.start() #metalist = pool.map(_multiparse, multi_in) #pool.close() #pool.join() # get meta information for all files elif not verbose: pbar = pb.ProgressBar(widgets=['Parsing {:8}: '.format(study_identifier), pb.FormatLabel('%(value)4d'), '/', '%4d' % len(mzml_files), pb.Bar(marker=MARKER, left=" |", right="| "), pb.ETA()]) for i in pbar(mzml_files): if compr: ext = i.name.split(os.path.extsep)[-1] else: ext = i.split(os.path.extsep)[-1] parser = _PARSERS[ext] ont = _ONTOLOGIES[ext] metalist.append(parser(i, ont).meta) else: for i in mzml_files: print("Parsing file: {}".format(i)) if compr: ext = i.name.split(os.path.extsep)[-1] else: ext = i.split(os.path.extsep)[-1] parser = _PARSERS[ext] ont = _ONTOLOGIES[ext] print parser print ont print i metalist.append(parser(i, ont).meta) # update isa-tab file if merge and ext=='imzML': if verbose: print('Attempting to merge profile and centroid scans') metalist = merge_spectra(metalist) if metalist: if verbose: print("Parsing mzML meta information into ISA-Tab structure") isa_tab_create = isa.ISA_Tab(out_dir, study_identifier, usermeta or {}).write(metalist, ext, split) else: warnings.warn("No files were found in {}.".format(in_dir), UserWarning)