Beispiel #1
0
def read_ratios(params, args_in):
    """reading ratios matrix"""
    if params['normalize_ratios']:
        if test_data_change(params, args_in) == True:
            #Turn off the nochange_filter if you're resuming a run an have changed the data matrix
            ratio_filters = [dm.center_scale_filter]
        else :
            ratio_filters = [dm.nochange_filter, dm.center_scale_filter]
    else:
        ratio_filters = []

    matrix_factory = dm.DataMatrixFactory(ratio_filters)
    matrix_filename = args_in.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename)
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    if params['case_sensitive'] or args_in.case_sensitive:
        ratios = matrix_factory.create_from(infile, True)
    else:
        ratios = matrix_factory.create_from(infile, False)
    return ratios
Beispiel #2
0
    def read_features(self, feature_ids):
        """Returns a list containing the features for the specified feature
        ids"""

        def read_feature(line):
            """Creates and adds a feature and associated contig from current
            DelimitedFile line"""
            contig = line[3]
            is_reverse = False
            if line[6] == 'R':
                is_reverse = True

            # note that feature positions can sometimes start with a '>'
            # or '<', so make sure it is stripped away
            return st.Feature(line[0], line[1], line[2],
                              st.Location(contig,
                                          int(line[4].lstrip('<>')),
                                          int(line[5].lstrip('<>')),
                                          is_reverse))

        features = {}
        dfile = util.dfile_from_text(self.__rsat_info.get_features(), comment='--')
        for line in dfile.lines:
            feature_id = line[0]
            if feature_id in feature_ids:
                features[feature_id] = read_feature(line)
        return features
Beispiel #3
0
    def read_features(self, feature_ids):
        """Returns a list containing the features for the specified feature
        ids"""

        def read_feature(line):
            """Creates and adds a feature and associated contig from current
            DelimitedFile line"""
            contig = line[3]
            is_reverse = False
            if line[6] == 'R':
                is_reverse = True

            # note that feature positions can sometimes start with a '>'
            # or '<', so make sure it is stripped away
            return st.Feature(line[0], line[1], line[2],
                              st.Location(contig,
                                          int(line[4].lstrip('<>')),
                                          int(line[5].lstrip('<>')),
                                          is_reverse))

        features = {}
        dfile = util.dfile_from_text(self.__rsat_info.get_features(), comment='--')
        for line in dfile.lines:
            feature_id = line[0]
            if feature_id in feature_ids:
                features[feature_id] = read_feature(line)
        return features
Beispiel #4
0
def read_ratios(params, args_in):
    """reading ratios matrix"""
    if params['normalize_ratios']:
        if test_data_change(params, args_in) == True:
            #Turn off the nochange_filter if you're resuming a run an have changed the data matrix
            ratio_filters = [dm.center_scale_filter]
        else:
            ratio_filters = [dm.nochange_filter, dm.center_scale_filter]
    else:
        ratio_filters = []

    matrix_factory = dm.DataMatrixFactory(ratio_filters)
    matrix_filename = args_in.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename).decode('utf-8')
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    if params['case_sensitive'] or args_in.case_sensitive:
        ratios = matrix_factory.create_from(infile, True)
    else:
        ratios = matrix_factory.create_from(infile, False)
    return ratios
Beispiel #5
0
 def test_create_from_text(self):
     """Reads a tab delimited file from a text"""
     dfile = util.dfile_from_text("value11\tvalue12\nvalue21\tvalue22")
     lines = dfile.lines
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
     self.assertIsNone(dfile.header)
Beispiel #6
0
 def test_create_from_text(self):
     """Reads a tab delimited file from a text"""
     dfile = util.dfile_from_text(
         "value11\tvalue12\nvalue21\tvalue22")
     lines = dfile.lines
     self.assertEquals(["value11", "value12"], lines[0])
     self.assertEquals(["value21", "value22"], lines[1])
     self.assertIsNone(dfile.header)
Beispiel #7
0
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.dfile_from_text(self.__rsat_info.get_feature_names(), comment="--")
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification]
         )
     return self.__synonyms
Beispiel #8
0
 def thesaurus(self):
     """reads the thesaurus from a feature_names file. The thesaurus
     is also cached, because it is used many times
     """
     if not self.__synonyms:
         feature_names_dfile = util.dfile_from_text(
             self.__rsat_info.get_feature_names(), comment='--')
         self.__synonyms = thesaurus.create_from_rsat_feature_names(
             feature_names_dfile, [thesaurus.strip_vng_modification])
     return self.__synonyms
Beispiel #9
0
def __get_predictions(microbes_online, organism):
    """reads the operon predictions for a given organism from MicrobesOnline"""
    preds_text = microbes_online.get_operon_predictions_for(
        organism.taxonomy_id())
    dfile = util.dfile_from_text(preds_text, has_header=True)
    code = organism.code
    preds = [(patches.patch_mo_gene(code, line[2]),
              patches.patch_mo_gene(code, line[3])) for line in dfile.lines
             if line[6] == 'TRUE']
    logging.info("%d prediction pairs read", len(preds))
    return preds
def __get_predictions(microbes_online, organism):
    """reads the operon predictions for a given organism from MicrobesOnline"""
    preds_text = microbes_online.get_operon_predictions_for(organism.taxonomy_id())
    dfile = util.dfile_from_text(preds_text, has_header=True)
    code = organism.code
    preds = [
        (patches.patch_mo_gene(code, line[2]), patches.patch_mo_gene(code, line[3]))
        for line in dfile.lines
        if line[6] == "TRUE"
    ]
    logging.info("%d prediction pairs read", len(preds))
    return preds
Beispiel #11
0
 def get_taxonomy_id(self, organism):
     """returns the specified organism name file contents"""
     logging.debug('RSAT - get_organism_names(%s)', organism)
     cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism])
     #Changed 02-19-15 due to missing organism_names file in h.pylori
     #text = util.read_url_cached(
     #    "/".join([self.base_url, RsatDatabase.DIR_PATH, organism,
     #              RsatDatabase.ORGANISM_NAMES_PATH]), cache_file)
     text = util.read_url_cached(
         "/".join([self.base_url, RsatDatabase.DIR_PATH, organism,
                   RsatDatabase.ORGANISM_PATH]), cache_file).decode('utf-8')
     organism_names_dfile = util.dfile_from_text(text, comment='--')
     return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
Beispiel #12
0
 def get_taxonomy_id(self, organism):
     """returns the specified organism name file contents"""
     logging.debug('RSAT - get_organism_names(%s)', organism)
     cache_file = "/".join([self.cache_dir, 'rsatnames_' + organism])
     #Changed 02-19-15 due to missing organism_names file in h.pylori
     #text = util.read_url_cached(
     #    "/".join([self.base_url, RsatDatabase.DIR_PATH, organism,
     #              RsatDatabase.ORGANISM_NAMES_PATH]), cache_file)
     text = util.read_url_cached(
         "/".join([self.base_url, RsatDatabase.DIR_PATH, organism,
                   RsatDatabase.ORGANISM_PATH]), cache_file).decode('utf-8')
     organism_names_dfile = util.dfile_from_text(text, comment='--')
     return patches.patch_ncbi_taxonomy(organism_names_dfile.lines[0][0])
Beispiel #13
0
def read_ratios(params, args):
    """reading ratios matrix"""
    if params['normalize_ratios']:
        ratio_filters = [dm.nochange_filter, dm.center_scale_filter]
    else:
        ratio_filters = []

    matrix_factory = dm.DataMatrixFactory(ratio_filters)
    matrix_filename = args.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename)
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    if params['case_sensitive'] or args.case_sensitive:
        ratios = matrix_factory.create_from(infile, True)
    else:
        ratios = matrix_factory.create_from(infile, False)
    return ratios
Beispiel #14
0
        if not args.rsat_dir:
            args.nomotifs = True
        if not args.string and not args.operons:
            args.nonetworks = True

    # user overrides in config files
    if args.config:
        config.read(args.config)

    matrix_factory = dm.DataMatrixFactory([dm.nochange_filter,
                                           dm.center_scale_filter])
    matrix_filename = args.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename)
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    matrix = matrix_factory.create_from(infile)
    infile = None

    # override number of clusters either on the command line or through
    # the config file
    try:
        num_clusters = config.getint("General", "num_clusters")
    except:
        num_clusters = args.numclusters

    cmonkey_run = cmr.CMonkeyRun(args.organism, matrix,
                                 string_file=args.string,
Beispiel #15
0
        if not args.rsat_dir:
            args.nomotifs = True
        if not args.string and not args.operons:
            args.nonetworks = True

    # user overrides in config files
    if args.config:
        config.read(args.config)

    matrix_factory = dm.DataMatrixFactory(
        [dm.nochange_filter, dm.center_scale_filter])
    matrix_filename = args.ratios

    if matrix_filename.startswith('http://'):
        indata = util.read_url(matrix_filename)
        infile = util.dfile_from_text(indata, has_header=True, quote='\"')
    else:
        infile = util.read_dfile(matrix_filename, has_header=True, quote='\"')

    matrix = matrix_factory.create_from(infile)
    infile = None

    # override number of clusters either on the command line or through
    # the config file
    try:
        num_clusters = config.getint("General", "num_clusters")
    except:
        num_clusters = args.numclusters

    cmonkey_run = cmr.CMonkeyRun(args.organism,
                                 matrix,