Esempio n. 1
0
print("\n======= Phase II, parsing data =======")
# For now, download and store the data in the parsed.py module. This module
# could be replaced or re-implemented using something like DBM to help with
# memory usage.
interval_time = time.time()
working_dir = os.getcwd()
for root, dirs, filenames in os.walk(working_dir):
    for f in filenames:
        if f in baseline_data:
            data_tuple = baseline_data.get(f)
            parser = data_tuple[PARSER_TYPE]("datasets/" + f)
            if verbose:
                parser.is_verbose()
                print("Running " + str(parser))
            for x in parser.parse():
                parsed.build_data(x, str(parser))
print("Phase II ran in " + str(((time.time() - interval_time) / 60)) + " minutes")

print("\n======= Phase III, building namespaces =======")
interval_time = time.time()
# load parsed data to build namespaces
ei = parsed.load_data("entrez_info")
eh = parsed.load_data("entrez_history")
hg = parsed.load_data("hgnc")
mg = parsed.load_data("mgi")
rg = parsed.load_data("rgd")
sp = parsed.load_data("swiss")
af = parsed.load_data("affy")
g2 = parsed.load_data("gene2acc")
chebi = parsed.load_data("chebi")
schem = parsed.load_data("schem")
	object_dict = {}
	for root, dirs, filenames in os.walk(working_dir):
		for fn in filenames:
			if fn in baseline_data:
				try:
					data_tuple = baseline_data.get(fn)
					data_object = data_tuple[2]
					parser = data_tuple[PARSER_TYPE]('datasets/'+fn)
					if verbose:
						parser.is_verbose()
						print('Running {0} on file {1}'.format(str(parser), fn))
				except:
					print('WARNING - skipping {0}; file not properly configured'.format(fn))
					continue			
				for x in parser.parse():
					parsed.build_data(x, str(parser), data_object)
				# if data_tuple[2] is a list of objects, handle list
				if isinstance(data_object, list):
					for o in data_object:
						o.source_file = fn
						with open(str(o) + '.' + args.parsed_pickle, 'wb') as f:
							pickle.dump(o, f, pickle.HIGHEST_PROTOCOL)
						object_dict[str(o) + '_data'] = o
					continue
				# if data_tuple[2] is a single object
				elif isinstance(data_object, DataSet):
					data_object.source_file = fn
					with open(str(data_object) + '.' + args.parsed_pickle, 'wb') as f:
						pickle.dump(data_object, f, pickle.HIGHEST_PROTOCOL)
					object_dict[str(data_object) + '_data'] = data_object
	
Esempio n. 3
0
 if fn in baseline_data:
     try:
         data_tuple = baseline_data.get(fn)
         data_object = data_tuple[2]
         parser = data_tuple[PARSER_TYPE]('datasets/' + fn)
         if verbose:
             parser.is_verbose()
             print('Running {0} on file {1}'.format(
                 str(parser), fn))
     except:
         print(
             'WARNING - skipping {0}; file not properly configured'.
             format(fn))
         continue
     for x in parser.parse():
         parsed.build_data(x, str(parser), data_object)
     # if data_tuple[2] is a list of objects, handle list
     if isinstance(data_object, list):
         for o in data_object:
             o.source_file = fn
             with open(str(o) + '.' + args.parsed_pickle,
                       'wb') as f:
                 pickle.dump(o, f, pickle.HIGHEST_PROTOCOL)
             object_dict[str(o) + '_data'] = o
         continue
     # if data_tuple[2] is a single object
     elif isinstance(data_object, DataSet):
         data_object.source_file = fn
         with open(
                 str(data_object) + '.' + args.parsed_pickle,
                 'wb') as f: