print("\n======= Phase II, parsing data =======") # For now, download and store the data in the parsed.py module. This module # could be replaced or re-implemented using something like DBM to help with # memory usage. interval_time = time.time() working_dir = os.getcwd() for root, dirs, filenames in os.walk(working_dir): for f in filenames: if f in baseline_data: data_tuple = baseline_data.get(f) parser = data_tuple[PARSER_TYPE]("datasets/" + f) if verbose: parser.is_verbose() print("Running " + str(parser)) for x in parser.parse(): parsed.build_data(x, str(parser)) print("Phase II ran in " + str(((time.time() - interval_time) / 60)) + " minutes") print("\n======= Phase III, building namespaces =======") interval_time = time.time() # load parsed data to build namespaces ei = parsed.load_data("entrez_info") eh = parsed.load_data("entrez_history") hg = parsed.load_data("hgnc") mg = parsed.load_data("mgi") rg = parsed.load_data("rgd") sp = parsed.load_data("swiss") af = parsed.load_data("affy") g2 = parsed.load_data("gene2acc") chebi = parsed.load_data("chebi") schem = parsed.load_data("schem")
object_dict = {} for root, dirs, filenames in os.walk(working_dir): for fn in filenames: if fn in baseline_data: try: data_tuple = baseline_data.get(fn) data_object = data_tuple[2] parser = data_tuple[PARSER_TYPE]('datasets/'+fn) if verbose: parser.is_verbose() print('Running {0} on file {1}'.format(str(parser), fn)) except: print('WARNING - skipping {0}; file not properly configured'.format(fn)) continue for x in parser.parse(): parsed.build_data(x, str(parser), data_object) # if data_tuple[2] is a list of objects, handle list if isinstance(data_object, list): for o in data_object: o.source_file = fn with open(str(o) + '.' + args.parsed_pickle, 'wb') as f: pickle.dump(o, f, pickle.HIGHEST_PROTOCOL) object_dict[str(o) + '_data'] = o continue # if data_tuple[2] is a single object elif isinstance(data_object, DataSet): data_object.source_file = fn with open(str(data_object) + '.' + args.parsed_pickle, 'wb') as f: pickle.dump(data_object, f, pickle.HIGHEST_PROTOCOL) object_dict[str(data_object) + '_data'] = data_object
if fn in baseline_data: try: data_tuple = baseline_data.get(fn) data_object = data_tuple[2] parser = data_tuple[PARSER_TYPE]('datasets/' + fn) if verbose: parser.is_verbose() print('Running {0} on file {1}'.format( str(parser), fn)) except: print( 'WARNING - skipping {0}; file not properly configured'. format(fn)) continue for x in parser.parse(): parsed.build_data(x, str(parser), data_object) # if data_tuple[2] is a list of objects, handle list if isinstance(data_object, list): for o in data_object: o.source_file = fn with open(str(o) + '.' + args.parsed_pickle, 'wb') as f: pickle.dump(o, f, pickle.HIGHEST_PROTOCOL) object_dict[str(o) + '_data'] = o continue # if data_tuple[2] is a single object elif isinstance(data_object, DataSet): data_object.source_file = fn with open( str(data_object) + '.' + args.parsed_pickle, 'wb') as f: