def read_ratios(params, args_in): """reading ratios matrix""" if params['normalize_ratios']: if test_data_change(params, args_in) == True: #Turn off the nochange_filter if you're resuming a run an have changed the data matrix ratio_filters = [dm.center_scale_filter] else : ratio_filters = [dm.nochange_filter, dm.center_scale_filter] else: ratio_filters = [] matrix_factory = dm.DataMatrixFactory(ratio_filters) matrix_filename = args_in.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename) infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') if params['case_sensitive'] or args_in.case_sensitive: ratios = matrix_factory.create_from(infile, True) else: ratios = matrix_factory.create_from(infile, False) return ratios
def read_features(self, feature_ids): """Returns a list containing the features for the specified feature ids""" def read_feature(line): """Creates and adds a feature and associated contig from current DelimitedFile line""" contig = line[3] is_reverse = False if line[6] == 'R': is_reverse = True # note that feature positions can sometimes start with a '>' # or '<', so make sure it is stripped away return st.Feature(line[0], line[1], line[2], st.Location(contig, int(line[4].lstrip('<>')), int(line[5].lstrip('<>')), is_reverse)) features = {} dfile = util.dfile_from_text(self.__rsat_info.get_features(), comment='--') for line in dfile.lines: feature_id = line[0] if feature_id in feature_ids: features[feature_id] = read_feature(line) return features
def read_ratios(params, args_in): """reading ratios matrix""" if params['normalize_ratios']: if test_data_change(params, args_in) == True: #Turn off the nochange_filter if you're resuming a run an have changed the data matrix ratio_filters = [dm.center_scale_filter] else: ratio_filters = [dm.nochange_filter, dm.center_scale_filter] else: ratio_filters = [] matrix_factory = dm.DataMatrixFactory(ratio_filters) matrix_filename = args_in.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename).decode('utf-8') infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') if params['case_sensitive'] or args_in.case_sensitive: ratios = matrix_factory.create_from(infile, True) else: ratios = matrix_factory.create_from(infile, False) return ratios
def test_create_from_text(self): """Reads a tab delimited file from a text""" dfile = util.dfile_from_text("value11\tvalue12\nvalue21\tvalue22") lines = dfile.lines self.assertEquals(["value11", "value12"], lines[0]) self.assertEquals(["value21", "value22"], lines[1]) self.assertIsNone(dfile.header)
def test_create_from_text(self): """Reads a tab delimited file from a text""" dfile = util.dfile_from_text( "value11\tvalue12\nvalue21\tvalue22") lines = dfile.lines self.assertEquals(["value11", "value12"], lines[0]) self.assertEquals(["value21", "value22"], lines[1]) self.assertIsNone(dfile.header)
def thesaurus(self): """reads the thesaurus from a feature_names file. The thesaurus is also cached, because it is used many times """ if not self.__synonyms: feature_names_dfile = util.dfile_from_text(self.__rsat_info.get_feature_names(), comment="--") self.__synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile, [thesaurus.strip_vng_modification] ) return self.__synonyms
def thesaurus(self): """reads the thesaurus from a feature_names file. The thesaurus is also cached, because it is used many times """ if not self.__synonyms: feature_names_dfile = util.dfile_from_text( self.__rsat_info.get_feature_names(), comment='--') self.__synonyms = thesaurus.create_from_rsat_feature_names( feature_names_dfile, [thesaurus.strip_vng_modification]) return self.__synonyms
def __get_predictions(microbes_online, organism): """reads the operon predictions for a given organism from MicrobesOnline""" preds_text = microbes_online.get_operon_predictions_for( organism.taxonomy_id()) dfile = util.dfile_from_text(preds_text, has_header=True) code = organism.code preds = [(patches.patch_mo_gene(code, line[2]), patches.patch_mo_gene(code, line[3])) for line in dfile.lines if line[6] == 'TRUE'] logging.info("%d prediction pairs read", len(preds)) return preds
def __get_predictions(microbes_online, organism): """reads the operon predictions for a given organism from MicrobesOnline""" preds_text = microbes_online.get_operon_predictions_for(organism.taxonomy_id()) dfile = util.dfile_from_text(preds_text, has_header=True) code = organism.code preds = [ (patches.patch_mo_gene(code, line[2]), patches.patch_mo_gene(code, line[3])) for line in dfile.lines if line[6] == "TRUE" ] logging.info("%d prediction pairs read", len(preds)) return preds
def get_taxonomy_id(self, organism): """returns the specified organism name file contents""" logging.debug('RSAT - get_organism_names(%s)', organism) cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism]) #Changed 02-19-15 due to missing organism_names file in h.pylori #text = util.read_url_cached( # "/".join([self.base_url, RsatDatabase.DIR_PATH, organism, # RsatDatabase.ORGANISM_NAMES_PATH]), cache_file) text = util.read_url_cached( "/".join([self.base_url, RsatDatabase.DIR_PATH, organism, RsatDatabase.ORGANISM_PATH]), cache_file).decode('utf-8') organism_names_dfile = util.dfile_from_text(text, comment='--') return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
def read_ratios(params, args): """reading ratios matrix""" if params['normalize_ratios']: ratio_filters = [dm.nochange_filter, dm.center_scale_filter] else: ratio_filters = [] matrix_factory = dm.DataMatrixFactory(ratio_filters) matrix_filename = args.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename) infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') if params['case_sensitive'] or args.case_sensitive: ratios = matrix_factory.create_from(infile, True) else: ratios = matrix_factory.create_from(infile, False) return ratios
if not args.rsat_dir: args.nomotifs = True if not args.string and not args.operons: args.nonetworks = True # user overrides in config files if args.config: config.read(args.config) matrix_factory = dm.DataMatrixFactory([dm.nochange_filter, dm.center_scale_filter]) matrix_filename = args.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename) infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') matrix = matrix_factory.create_from(infile) infile = None # override number of clusters either on the command line or through # the config file try: num_clusters = config.getint("General", "num_clusters") except: num_clusters = args.numclusters cmonkey_run = cmr.CMonkeyRun(args.organism, matrix, string_file=args.string,
if not args.rsat_dir: args.nomotifs = True if not args.string and not args.operons: args.nonetworks = True # user overrides in config files if args.config: config.read(args.config) matrix_factory = dm.DataMatrixFactory( [dm.nochange_filter, dm.center_scale_filter]) matrix_filename = args.ratios if matrix_filename.startswith('http://'): indata = util.read_url(matrix_filename) infile = util.dfile_from_text(indata, has_header=True, quote='\"') else: infile = util.read_dfile(matrix_filename, has_header=True, quote='\"') matrix = matrix_factory.create_from(infile) infile = None # override number of clusters either on the command line or through # the config file try: num_clusters = config.getint("General", "num_clusters") except: num_clusters = args.numclusters cmonkey_run = cmr.CMonkeyRun(args.organism, matrix,