Ejemplo n.º 1
0
    def get_feature(marker_id, dataset_id, dataset_name, feature_type,
                    mapping_result):

        feature = None

        if feature_type == DatasetsConfig.DATASET_TYPE_GENETIC_MARKER:

            feature = MarkerMapping(marker_id, dataset_id, dataset_name,
                                    feature_type, mapping_result,
                                    FeatureMapping.ROW_TYPE_ENRICHMENT)

        elif feature_type == DatasetsConfig.DATASET_TYPE_GENE:

            feature = GeneMapping(marker_id,
                                  dataset_id,
                                  dataset_name,
                                  feature_type,
                                  mapping_result,
                                  FeatureMapping.ROW_TYPE_ENRICHMENT,
                                  annots=[])

        elif feature_type == DatasetsConfig.DATASET_TYPE_ANCHORED:

            feature = AnchoredMapping(marker_id, dataset_id, dataset_name,
                                      feature_type, mapping_result,
                                      FeatureMapping.ROW_TYPE_ENRICHMENT)

        else:
            raise m2pException("Unrecognized feature type " +
                               str(feature_type) + ".")

        return feature
Ejemplo n.º 2
0
 def check_sort_param(self, map_config, sort_param, DEFAULT_SORT_PARAM):
     sort_by = ""
     
     map_name = map_config.get_name()
     map_has_cm_pos = map_config.has_cm_pos()
     map_has_bp_pos = map_config.has_bp_pos()
     map_default_sort_by = map_config.get_default_sort_by()
     
     if sort_param == map_default_sort_by:
         sort_by = sort_param
     else:
         # sort_param has priority
         if sort_param == MapTypes.MAP_SORT_PARAM_CM and map_has_cm_pos:
             sort_by = sort_param
         elif sort_param == MapTypes.MAP_SORT_PARAM_BP and map_has_bp_pos:
             sort_by = sort_param
         # else, check map_default_sort_by
         else:
             if sort_param != DEFAULT_SORT_PARAM:
                 sys.stderr.write("WARNING: the sort parameter "+sort_param+" is not compatible with map "+map_name+". Using default map sort parameter...\n")
             if map_default_sort_by == MapTypes.MAP_SORT_PARAM_CM and map_has_cm_pos:
                 sort_by = map_default_sort_by
             elif map_default_sort_by == MapTypes.MAP_SORT_PARAM_BP and map_has_bp_pos:
                 sort_by = map_default_sort_by
             else:
                 raise m2pException("Map default sort configure as \""+map_default_sort_by+"\" assigned to a map which has not such kind of position.")
     
     return sort_by
Ejemplo n.º 3
0
def load_conf(conf_file, verbose=False):
    conf_rows = []

    if verbose:
        sys.stderr.write("Loading configuration file " + conf_file + "...\n")

    try:
        for line in open(conf_file, 'r'):
            if line.startswith("#") or not line.strip():
                continue  # line.strip() is False if is an empty line "^$"
            if verbose:
                sys.stderr.write("\t conf line: " + line.strip() + "\n")

            line_data = line.strip().split(" ")

            conf_rows.append(line_data)

    except Exception:
        raise m2pException("Error loading configuration file " + conf_file +
                           ".")

    return conf_rows


## END
Ejemplo n.º 4
0
 def _load_config(self, config_file):
     self._config_dict = {}
     self._config_list = []
     
     conf_rows = load_conf(config_file, self._verbose) # data_utils.load_conf
     
     #self._config_dict = load_maps(self._config_file, self._verbose) # data_utils.load_maps
     for conf_row in conf_rows:
         
         dataset_name = conf_row[DatasetsConfig.DATASET_NAME]
         dataset_id = conf_row[DatasetsConfig.DATASET_ID]
         dataset_type = conf_row[DatasetsConfig.DATASET_TYPE]
         file_path = conf_row[DatasetsConfig.FILE_PATH]
         file_type = conf_row[DatasetsConfig.FILE_TYPE]
         databases = conf_row[DatasetsConfig.DATABASES].strip().split(",")
         synonyms = conf_row[DatasetsConfig.SYNONYMS]
         prefixes = conf_row[DatasetsConfig.PREFIXES].strip().split(",")
         
         dataset = DatasetConfig(dataset_name, dataset_id, dataset_type, file_path, file_type, databases, synonyms, prefixes)
         
         if dataset_name.startswith(">"):
             dataset.set_dataset_name(dataset_name[1:]) # remove the ">" from the name
             dataset.set_ignore_build(True) # mark the dataset as to be ignored in the build datasets script
             
         if dataset_id in self._config_dict:
             raise m2pException("Duplicated dataset "+dataset_id+" in configuration file "+config_file+".")
         else:
             self._config_dict[dataset_id] = dataset
             self._config_list.append(dataset_id)
     
     return
Ejemplo n.º 5
0
 def align(self, fasta_path, db, ref_type, threshold_id, threshold_cov):
     
     sys.stderr.write("\n")
     
     fasta_headers = alignment_utils.get_fasta_headers(fasta_path)
     
     sys.stderr.write("GMAPAligner: DB --> "+str(db)+"\n")
     sys.stderr.write("GMAPAligner: to align "+str(len(fasta_headers))+"\n")
     
     # use GMAP or GMAPL
     if ref_type == REF_TYPE_STD:
         app_path = self._app_path
     elif ref_type == REF_TYPE_BIG:
         app_path = self._gmapl_app_path
     else:
         raise m2pException("GMAPAligner: Unrecognized ref type "+ref_type+".")
     
     # get_hits from m2p_gmap.py
     self._results_hits = m2p_gmap.get_best_score_hits(app_path, self._n_threads, fasta_path, self._dbs_path, db,
                                   threshold_id, threshold_cov, \
                                   self._verbose)
     
     query_list = [a.get_query_id() for a in self._results_hits]
     
     sys.stderr.write("GMAPAligner: aligned "+str(len(set([a.split(" ")[0] for a in query_list])))+"\n")
     
     self._results_unaligned = alignment_utils.filter_list(fasta_headers, query_list)
     
     sys.stderr.write("GMAPAligner: no hits "+str(len(self._results_unaligned))+"\n")
     
     return self.get_hits()
Ejemplo n.º 6
0
 def _map_intervals(self, sorted_map, map_sort_by, extend_window):
     map_intervals = []
     
     if self._verbose: sys.stderr.write("MarkerEnricher: creating intervals on markers\n")
     
     sys.stderr.write("MarkerEnricher: map sort by "+str(map_sort_by)+"\n")
     
     if map_sort_by == MapTypes.MAP_SORT_PARAM_BP:
         self.MAP_UNIT = self.MAP_UNIT_PHYSICAL
     elif map_sort_by == MapTypes.MAP_SORT_PARAM_CM:
         self.MAP_UNIT = self.MAP_UNIT_GENETIC
     else:
         raise m2pException("Unrecognized map sort unit "+str(map_sort_by)+".")
     
     # Loop over consecutive positions to compare them and create intervals
     prev_position = None
     prev_interval = None
     for map_position in sorted_map:
         #sys.stderr.write("\tMap position: "+str(map_position)+"\n")
         
         pos_marker = map_position.get_marker_id() #position[MapFields.MARKER_NAME_POS]
         pos_chr = map_position.get_chrom_name() #position[MapFields.MARKER_CHR_POS]
         pos_pos = map_position.get_sort_pos(map_sort_by) #float(position[map_sort_by])
         pos_end_pos = map_position.get_sort_end_pos(map_sort_by)
         
         interval = self._get_new_interval(map_position, pos_chr, pos_pos, pos_end_pos, extend_window)
         #sys.stderr.write("\tInterval "+str(interval)+"\n")
         
         self._append_interval(map_intervals, interval)
     
     sys.stderr.write("MapEnricher: "+str(len(map_intervals))+" intervals created.\n")
     
     return map_intervals
Ejemplo n.º 7
0
 def get_search_engine(search_type, maps_path, best_score_param, databases_config,
                       aligner_list, threshold_id, threshold_cov, n_threads, verbose = False):
     
     search_engine = None
     
     if search_type == MapsConfig.SEARCH_TYPE_GREEDY:
         
         if best_score_param:
             search_engine = SearchEngineGreedy(maps_path, best_score_param, databases_config, aligner_list,
                                                threshold_id, threshold_cov, n_threads, ALIGNMENT_TYPE_BEST_SCORE, verbose)
         else:
             search_engine = SearchEngineGreedy(maps_path, best_score_param, databases_config, aligner_list,
                                                threshold_id, threshold_cov, n_threads, ALIGNMENT_TYPE_GREEDY, verbose)
             
     elif search_type == MapsConfig.SEARCH_TYPE_HIERARCHICAL:
         
         search_engine = SearchEngineGreedy(maps_path, best_score_param, databases_config, aligner_list,
                                                threshold_id, threshold_cov, n_threads, ALIGNMENT_TYPE_HIERARCHICAL, verbose)
         
     elif search_type == MapsConfig.SEARCH_TYPE_EXHAUSTIVE:
         
         if best_score_param:
             search_engine = SearchEngineExhaustive(maps_path, best_score_param, databases_config, aligner_list,
                                                threshold_id, threshold_cov, n_threads, ALIGNMENT_TYPE_BEST_SCORE, verbose)
         else:
             search_engine = SearchEngineExhaustive(maps_path, best_score_param, databases_config, aligner_list,
                                                threshold_id, threshold_cov, n_threads, ALIGNMENT_TYPE_GREEDY, verbose)
     else:
         raise m2pException("Unrecognized search type "+search_type+".")
     
     return search_engine
Ejemplo n.º 8
0
    def get_alignment_engine(search_type, aligner_list, paths_config,
                             ref_type_param, n_threads, verbose):

        alignment_engine = None

        if search_type == ALIGNMENT_TYPE_GREEDY:

            alignment_engine = GreedyEngine(aligner_list, paths_config,
                                            ref_type_param, n_threads, verbose)

        elif search_type == ALIGNMENT_TYPE_HIERARCHICAL:

            alignment_engine = HierarchicalEngine(aligner_list, paths_config,
                                                  ref_type_param, n_threads,
                                                  verbose)

        elif search_type == ALIGNMENT_TYPE_BEST_SCORE:

            alignment_engine = BestScoreEngine(aligner_list, paths_config,
                                               ref_type_param, n_threads,
                                               verbose)

        else:
            raise m2pException("Unrecognized search type " + search_type + ".")

        return alignment_engine
Ejemplo n.º 9
0
 def _map_intervals(self, sorted_map, map_sort_by, extend_window):
     map_intervals = []
     
     if self._verbose: sys.stderr.write("MapEnricher: creating intervals around markers\n")
     
     sys.stderr.write("MapEnricher: map sort by "+str(map_sort_by)+", extend interval "+str(extend_window)+"\n")
     
     if map_sort_by == MapTypes.MAP_SORT_PARAM_BP:
         self.MAP_UNIT = self.MAP_UNIT_PHYSICAL
     elif map_sort_by == MapTypes.MAP_SORT_PARAM_CM:
         self.MAP_UNIT = self.MAP_UNIT_GENETIC
     else:
         raise m2pException("Unrecognized map sort unit "+str(map_sort_by)+".")
     
     # Loop over consecutive positions to compare them and create intervals
     prev_position = None
     prev_interval = None
     for map_position in sorted_map:
         
         pos_marker = map_position.get_marker_id() #position[MapFields.MARKER_NAME_POS]
         pos_chr = map_position.get_chrom_name() #position[MapFields.MARKER_CHR_POS]
         pos_pos = map_position.get_sort_pos(map_sort_by) #float(position[map_sort_by])
         pos_end_pos = map_position.get_sort_end_pos(map_sort_by)
         
         #if self._verbose: sys.stderr.write("\tMap position: "+str(map_position)+"\n")
         
         interval = self._get_new_interval(map_position, pos_chr, pos_pos, pos_end_pos, extend_window)
         
         ## check whether intervals overlap to each other
         if prev_position:
             
             prev_chr = prev_position.get_chrom_name() #prev_position[MapFields.MARKER_CHR_POS]
             if pos_chr != prev_chr:
                 self._append_interval(map_intervals, prev_interval)
                 
             # The same chromosome...
             else:
                 # Check if there is overlap
                 if MapInterval.intervals_overlap(prev_interval, interval):
                     self._add_position_to_interval(prev_interval, map_position, pos_end_pos, extend_window)
                     interval = prev_interval
                     #if self._verbose: sys.stdout.write("\t\toverlap --> Updated interval "+str(prev_interval)+"\n")
                 else:
                     self._append_interval(map_intervals, prev_interval)
         
         # If first interval
         # else: DO NOTHING
         
         prev_position = map_position
         prev_interval = interval
     
     # Append the last interval
     if prev_interval:
         self._append_interval(map_intervals, prev_interval)
     
     sys.stderr.write("MapEnricher: "+str(len(map_intervals))+" intervals created.\n")
     
     return map_intervals
Ejemplo n.º 10
0
 def create_map(self,
                query_path,
                query_sets_ids,
                map_config,
                facade,
                sort_param,
                multiple_param,
                tmp_files_dir=None):
     raise m2pException("To be implemented in child classes.")
Ejemplo n.º 11
0
def __gmap(gmap_app_path, n_threads, threshold_id, threshold_cov, query_fasta_path, gmap_dbs_path, db_name, verbose = False):
    
    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = gmap_dbs_path + "/" + db_name
    dbpathfile = dbpath + "/" + db_name + ".ref153positions"
    sys.stderr.write("Checking database: "+dbpath+" DB exists for "+ALIGNER+".\n")
    
    if not (os.path.exists(dbpathfile) and os.path.isfile(dbpathfile)):
        raise m2pException("DB path "+dbpath+" for "+ALIGNER+" aligner NOT FOUND.")
    
    # GMAP
    __command = "".join([gmap_app_path, \
                " -t ", str(n_threads), \
                " -B 0 -n ", str(MAX_NUMBER_PATHS_PER_QUERY)])
    
    gmap_thres_id = float(threshold_id) / 100.0
    gmap_thres_cov = float(threshold_cov) / 100.0
    
    if verbose: sys.stderr.write("m2p_gmap: Thresholds: ID="+str(gmap_thres_id)+"; COV="+str(gmap_thres_cov)+"\n")
    
    __filter_id = "--min-identity="+str(gmap_thres_id)
    __filter_cov = "--min-trimmed-coverage="+str(gmap_thres_cov)
    __db = "".join([" -d ", db_name])
    __db_dir = "".join([" -D ", gmap_dbs_path])
    
    gmap_cmd = " ".join([__command, __filter_id, __filter_cov, __db, __db_dir, query_fasta_path])
    
    if verbose: sys.stderr.write("m2p_gmap: Executing '"+gmap_cmd+"'\n")
    
    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(gmap_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(gmap_cmd, shell=True, stdout=PIPE, stderr=PIPE)
    
    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode
    
    if retValue != 0:
        if verbose:
            raise Exception("m2p_gmap: return != 0. "+gmap_cmd+"\n")
        else:
            raise Exception("m2p_gmap: return != 0. "+gmap_cmd+"\nError: "+str(output_err)+"\n")
    
    if verbose: sys.stderr.write("m2p_gmap: GMAP return value "+str(retValue)+"\n"+str(output_err)+"\n")
    
    results = __compress(output, db_name)
    
    #print "M2PGMAP***********************"
    #for result in results:
    #    print result
    
    return results
Ejemplo n.º 12
0
 def get_results(self):
     retvalue = None
     
     if self._results != None:
         retvalue = self._results
     else:
         raise m2pException("DatasetsRetriever: error obtaining unloaded results. Call a retrieve method first.")
     
     return retvalue
Ejemplo n.º 13
0
 def get_unmapped(self):
     retvalue = None
     
     if self._unmapped != None:
         retvalue = self._unmapped
     else:
         raise m2pException("DatasetsRetriever: error obtaining unloaded list of unmapped markers. Call a retrieve method first.")
     
     return retvalue
Ejemplo n.º 14
0
 def output_features_header(self,
                            map_as_physical,
                            map_has_cm_pos,
                            map_has_bp_pos,
                            multiple_param,
                            load_annot=False,
                            annotator=None):
     raise m2pException(
         "Method has to be implemented in child class inheriting from OutputPrinter"
     )
Ejemplo n.º 15
0
def __hs_blast(hsblastn_app_path, n_threads, query_fasta_path, hsblastn_dbs_path, db_name, verbose = False):
    results = []
    
    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = hsblastn_dbs_path + db_name
    dbpathfile = dbpath + ".bwt"
    sys.stderr.write("Checking database: "+dbpath+" DB exists for "+ALIGNER+".\n")
    
    if not (os.path.exists(dbpathfile) and os.path.isfile(dbpathfile)):
        raise m2pException("DB path "+dbpath+" for "+ALIGNER+" aligner NOT FOUND.")
    
    ###### HS-Blastn
    blast_command = " ".join([hsblastn_app_path, " align ", \
                            " -num_threads ", str(n_threads), \
                "-dust no ", \
                '-outfmt 6'])
                #'-outfmt \"6 qseqid qlen sseqid slen length qstart qend sstart send bitscore evalue pident mismatch gapopen\"'])
    
    blast_db = "".join(["-db ", dbpath]) # blast_db = "".join(["-db ", blast_dbs_path, db_name , ".fa"]) # 
    blast_query = " ".join(["-query ", query_fasta_path])
    #blast_cmd = " ".join([ResourcesMng.get_deploy_dir()+blast_command, blast_db, blast_query])
    blast_cmd = " ".join([blast_command, blast_db, blast_query])
    
    if verbose: sys.stderr.write(os.path.basename(__file__)+": Running '"+blast_cmd+"'\n")
    
    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=PIPE)
    
    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode
    
    if retValue != 0:
        if verbose:
            raise Exception(os.path.basename(__file__)+": HS-Blastn return != 0. "+blast_cmd+"\n"+str(output)+"\n")
        else:
            raise Exception(os.path.basename(__file__)+": HS-Blastn return != 0. "+blast_cmd+"\n"+str(output)+"\n"+str(output_err)+"\n")
    
    if "error" in output or "Error" in output or "ERROR" in output:
        sys.stderr.write("m2p_hs_blast: error in hs-blastn output. We will report 0 results for this alignment.\n")
        sys.stderr.write(output+"\n")
        sys.stderr.write(str(output_err)+"\n")
        results = []
    else:
        if verbose: sys.stderr.write(os.path.basename(__file__)+": HS-Blastn return value "+str(retValue)+"\n")
        
        [results.append(line) for line in output.strip().split("\n") if line != ""]
    
    return results
Ejemplo n.º 16
0
def __hs_blast(hsblastn_app_path, n_threads, query_fasta_path, hsblastn_dbs_path, db_name, verbose = False):
    results = []
    
    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = hsblastn_dbs_path + db_name
    dbpathfile = dbpath + ".bwt"
    sys.stderr.write("Checking database: "+dbpath+" DB exists for "+ALIGNER+".\n")
    
    if not (os.path.exists(dbpathfile) and os.path.isfile(dbpathfile)):
        raise m2pException("DB path "+dbpath+" for "+ALIGNER+" aligner NOT FOUND.")
    
    ###### HS-Blastn
    blast_command = " ".join([hsblastn_app_path, " align ", \
                            " -num_threads ", str(n_threads), \
                "-dust no ", \
                '-outfmt 6'])
                #'-outfmt \"6 qseqid qlen sseqid slen length qstart qend sstart send bitscore evalue pident mismatch gapopen\"'])
    
    blast_db = "".join(["-db ", dbpath]) # blast_db = "".join(["-db ", blast_dbs_path, db_name , ".fa"]) # 
    blast_query = " ".join(["-query ", query_fasta_path])
    #blast_cmd = " ".join([ResourcesMng.get_deploy_dir()+blast_command, blast_db, blast_query])
    blast_cmd = " ".join([blast_command, blast_db, blast_query])
    
    if verbose: sys.stderr.write(os.path.basename(__file__)+": Running '"+blast_cmd+"'\n")
    
    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=PIPE)
    
    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode
    
    if retValue != 0:
        if verbose:
            raise Exception(os.path.basename(__file__)+": HS-Blastn return != 0. "+blast_cmd+"\n"+str(output)+"\n")
        else:
            raise Exception(os.path.basename(__file__)+": HS-Blastn return != 0. "+blast_cmd+"\n"+str(output)+"\n"+str(output_err)+"\n")
    
    if "error" in output or "Error" in output or "ERROR" in output:
        sys.stderr.write("m2p_hs_blast: error in hs-blastn output. We will report 0 results for this alignment.\n")
        sys.stderr.write(output+"\n")
        sys.stderr.write(str(output_err)+"\n")
        results = []
    else:
        if verbose: sys.stderr.write(os.path.basename(__file__)+": HS-Blastn return value "+str(retValue)+"\n")
        
        [results.append(line) for line in output.strip().split("\n") if line != ""]
    
    return results
Ejemplo n.º 17
0
    def get_search_engine(search_type,
                          maps_path,
                          best_score_param,
                          databases_config,
                          aligner_list,
                          threshold_id,
                          threshold_cov,
                          n_threads,
                          verbose=False):

        search_engine = None

        if search_type == MapsConfig.SEARCH_TYPE_GREEDY:

            if best_score_param:
                search_engine = SearchEngineGreedy(maps_path, best_score_param,
                                                   databases_config,
                                                   aligner_list, threshold_id,
                                                   threshold_cov, n_threads,
                                                   ALIGNMENT_TYPE_BEST_SCORE,
                                                   verbose)
            else:
                search_engine = SearchEngineGreedy(maps_path, best_score_param,
                                                   databases_config,
                                                   aligner_list, threshold_id,
                                                   threshold_cov, n_threads,
                                                   ALIGNMENT_TYPE_GREEDY,
                                                   verbose)

        elif search_type == MapsConfig.SEARCH_TYPE_HIERARCHICAL:

            search_engine = SearchEngineGreedy(maps_path, best_score_param,
                                               databases_config, aligner_list,
                                               threshold_id, threshold_cov,
                                               n_threads,
                                               ALIGNMENT_TYPE_HIERARCHICAL,
                                               verbose)

        elif search_type == MapsConfig.SEARCH_TYPE_EXHAUSTIVE:

            if best_score_param:
                search_engine = SearchEngineExhaustive(
                    maps_path, best_score_param, databases_config,
                    aligner_list, threshold_id, threshold_cov, n_threads,
                    ALIGNMENT_TYPE_BEST_SCORE, verbose)
            else:
                search_engine = SearchEngineExhaustive(
                    maps_path, best_score_param, databases_config,
                    aligner_list, threshold_id, threshold_cov, n_threads,
                    ALIGNMENT_TYPE_GREEDY, verbose)
        else:
            raise m2pException("Unrecognized search type " + search_type + ".")

        return search_engine
Ejemplo n.º 18
0
 def get_map_enricher(show_how, enricher, mapping_results, verbose):
     map_enricher = None
     
     if show_how == SHOW_ON_INTERVALS:
         map_enricher = MapEnricher(enricher, mapping_results, verbose)
         
     elif show_how == SHOW_ON_MARKERS:
         map_enricher = MarkerEnricher(enricher, mapping_results, verbose)
         
     else:
         raise m2pException("Unrecognized show_how parameter "+str(show_how)+".")
     
     return map_enricher
Ejemplo n.º 19
0
 def get_enricher_factory(show_how):
     enricher_factory = None
     
     if show_how == SHOW_ON_INTERVALS:
         enricher_factory = EnricherFactory()
         
     elif show_how == SHOW_ON_MARKERS:
         enricher_factory = MarkerEnricherFactory()
         
     else:
         raise m2pException("Unrecognized show_how parameter "+str(show_how)+".")
     
     return enricher_factory
Ejemplo n.º 20
0
 def load_synonyms(self, synonyms):
     dataset_synonyms = {}
     
     if synonyms != "" and synonyms != DatasetsConfig.SYNONYMS_NO:
         for syn_line in open(synonyms, 'r'):
             syn_data = syn_line.strip().split()
             syn_key = syn_data[0]
             if syn_key in dataset_synonyms:
                 raise m2pException("Repeated synonyms entry for marker "+syn_key+".")
             else:
                 dataset_synonyms[syn_key] = syn_data
     
     return dataset_synonyms
Ejemplo n.º 21
0
 def output_results(self, aligned, databases_ids = None):
     
     if not databases_ids: raise m2pException("AlignmentsGreedyPrinter needs a list of DBs.")
     
     for db_entry in databases_ids:
         
         db_name = self._databases_config.get_database_name(db_entry)
         
         sys.stdout.write(">"+str(db_name)+"\n")
         self.print_header()
         self.print_records_db(aligned, db_entry, db_name)
     
     return
Ejemplo n.º 22
0
def __process_id(gtf_data, feature_type, file_type):
    new_id = ""
    
    if feature_type == GTF_TYPE_TRANSCRIPT:
        new_id = gtf_data[GTF_TRANSCRIPT_ID_COL]
        
    elif feature_type == GTF_TYPE_GENE:
        new_id = gtf_data[GTF_GENE_ID_COL]
    else:
        raise m2pException("Unrecognized GTF type "+str(feature_type)+".")
    
    if file_type == FILE_TYPE_GTF:
        new_id = new_id.translate(None, '";') # Remove " and ; from the string
        
    elif file_type == FILE_TYPE_GFF3:
        raise m2pException("GFF3 file type is not supported yet.")
    else:
        raise m2pException("Unrecognized file type "+file_type+".")
    
    return new_id

## END
Ejemplo n.º 23
0
 def get_alignments_printer(search_type, databases_config):
     alignments_printer = None
     
     if search_type == ALIGNMENT_TYPE_GREEDY:
         alignments_printer = AlignmentsGreedyPrinter(databases_config)
     elif search_type == ALIGNMENT_TYPE_HIERARCHICAL:
         alignments_printer = AlignmentsHierarchicalPrinter(databases_config)
     elif search_type == ALIGNMENT_TYPE_BEST_SCORE:
         alignments_printer = AlignmentsBestScorePrinter(databases_config)
     else:
         raise m2pException("Unrecognized search type "+search_type+".")
     
     return alignments_printer
Ejemplo n.º 24
0
def __process_id(gtf_data, feature_type, file_type):
    new_id = ""

    if feature_type == GTF_TYPE_TRANSCRIPT:
        new_id = gtf_data[GTF_TRANSCRIPT_ID_COL]

    elif feature_type == GTF_TYPE_GENE:
        new_id = gtf_data[GTF_GENE_ID_COL]
    else:
        raise m2pException("Unrecognized GTF type " + str(feature_type) + ".")

    if file_type == FILE_TYPE_GTF:
        new_id = new_id.translate(None, '";')  # Remove " and ; from the string

    elif file_type == FILE_TYPE_GFF3:
        raise m2pException("GFF3 file type is not supported yet.")
    else:
        raise m2pException("Unrecognized file type " + file_type + ".")

    return new_id


## END
Ejemplo n.º 25
0
def __run_command(cmd):
    
    sys.stderr.write(_SCRIPT+": running command:\n")
    sys.stderr.write("\t"+cmd+"\n")
    #p = Popen(cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    p = Popen(cmd, shell=True)
    com_list = p.communicate()
    retValue = p.returncode
    
    if retValue != 0: raise m2pException(_SCRIPT+": return != 0. "+cmd+"\n")
    
    sys.stderr.write(_SCRIPT+": return value "+str(retValue)+"\n")
    
    return
Ejemplo n.º 26
0
    def output_results(self, aligned, databases_ids=None):

        if not databases_ids:
            raise m2pException("AlignmentsGreedyPrinter needs a list of DBs.")

        for db_entry in databases_ids:

            db_name = self._databases_config.get_database_name(db_entry)

            sys.stdout.write(">" + str(db_name) + "\n")
            self.print_header()
            self.print_records_db(aligned, db_entry, db_name)

        return
Ejemplo n.º 27
0
    def get_enricher_factory(show_how):
        enricher_factory = None

        if show_how == SHOW_ON_INTERVALS:
            enricher_factory = EnricherFactory()

        elif show_how == SHOW_ON_MARKERS:
            enricher_factory = MarkerEnricherFactory()

        else:
            raise m2pException("Unrecognized show_how parameter " +
                               str(show_how) + ".")

        return enricher_factory
Ejemplo n.º 28
0
    def get_map_enricher(show_how, enricher, mapping_results, verbose):
        map_enricher = None

        if show_how == SHOW_ON_INTERVALS:
            map_enricher = MapEnricher(enricher, mapping_results, verbose)

        elif show_how == SHOW_ON_MARKERS:
            map_enricher = MarkerEnricher(enricher, mapping_results, verbose)

        else:
            raise m2pException("Unrecognized show_how parameter " +
                               str(show_how) + ".")

        return map_enricher
Ejemplo n.º 29
0
    def get_alignments_printer(search_type, databases_config):
        alignments_printer = None

        if search_type == ALIGNMENT_TYPE_GREEDY:
            alignments_printer = AlignmentsGreedyPrinter(databases_config)
        elif search_type == ALIGNMENT_TYPE_HIERARCHICAL:
            alignments_printer = AlignmentsHierarchicalPrinter(
                databases_config)
        elif search_type == ALIGNMENT_TYPE_BEST_SCORE:
            alignments_printer = AlignmentsBestScorePrinter(databases_config)
        else:
            raise m2pException("Unrecognized search type " + search_type + ".")

        return alignments_printer
Ejemplo n.º 30
0
def __run_command(cmd):

    sys.stderr.write(_SCRIPT + ": running command:\n")
    sys.stderr.write("\t" + cmd + "\n")
    #p = Popen(cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    p = Popen(cmd, shell=True)
    com_list = p.communicate()
    retValue = p.returncode

    if retValue != 0:
        raise m2pException(_SCRIPT + ": return != 0. " + cmd + "\n")

    sys.stderr.write(_SCRIPT + ": return value " + str(retValue) + "\n")

    return
Ejemplo n.º 31
0
 def get_empty_feature(feature_type):
     feature = None
     
     if feature_type == DatasetsConfig.DATASET_TYPE_GENETIC_MARKER:
         
         feature = MarkerMapping.get_empty()
         
     elif feature_type == DatasetsConfig.DATASET_TYPE_GENE:
         
         feature = GeneMapping.get_empty()
         
     elif feature_type == DatasetsConfig.DATASET_TYPE_ANCHORED:
         
         feature = AnchoredMapping.get_empty()
         
     else:
         raise m2pException("Unrecognized feature type "+str(feature_type)+".")
     
     return feature
Ejemplo n.º 32
0
    def get_empty_feature(feature_type):
        feature = None

        if feature_type == DatasetsConfig.DATASET_TYPE_GENETIC_MARKER:

            feature = MarkerMapping.get_empty()

        elif feature_type == DatasetsConfig.DATASET_TYPE_GENE:

            feature = GeneMapping.get_empty()

        elif feature_type == DatasetsConfig.DATASET_TYPE_ANCHORED:

            feature = AnchoredMapping.get_empty()

        else:
            raise m2pException("Unrecognized feature type " +
                               str(feature_type) + ".")

        return feature
Ejemplo n.º 33
0
def load_conf(conf_file, verbose = False):
    conf_rows = []
    
    if verbose: sys.stderr.write("Loading configuration file "+conf_file+"...\n")
    
    try:
        for line in open(conf_file, 'r'):
            if line.startswith("#") or not line.strip(): continue # line.strip() is False if is an empty line "^$"
            if verbose: sys.stderr.write("\t conf line: "+line.strip()+"\n")
            
            line_data = line.strip().split(" ")
            
            conf_rows.append(line_data) 
    
    except Exception:
        raise m2pException("Error loading configuration file "+conf_file+".")
    
    return conf_rows

## END
Ejemplo n.º 34
0
 def get_alignment_engine(search_type, aligner_list, paths_config, ref_type_param, n_threads, verbose):
     
     alignment_engine = None
     
     if search_type == ALIGNMENT_TYPE_GREEDY:
         
         alignment_engine = GreedyEngine(aligner_list, paths_config, ref_type_param, n_threads, verbose)
         
     elif search_type == ALIGNMENT_TYPE_HIERARCHICAL:
         
         alignment_engine = HierarchicalEngine(aligner_list, paths_config, ref_type_param, n_threads, verbose)
         
     elif search_type == ALIGNMENT_TYPE_BEST_SCORE:
         
         alignment_engine = BestScoreEngine(aligner_list, paths_config, ref_type_param, n_threads, verbose)
         
     else:
         raise m2pException("Unrecognized search type "+search_type+".")
     
     return alignment_engine
Ejemplo n.º 35
0
    def _map_intervals(self, sorted_map, map_sort_by, extend_window):
        map_intervals = []

        if self._verbose:
            sys.stderr.write("MarkerEnricher: creating intervals on markers\n")

        sys.stderr.write("MarkerEnricher: map sort by " + str(map_sort_by) +
                         "\n")

        if map_sort_by == MapTypes.MAP_SORT_PARAM_BP:
            self.MAP_UNIT = self.MAP_UNIT_PHYSICAL
        elif map_sort_by == MapTypes.MAP_SORT_PARAM_CM:
            self.MAP_UNIT = self.MAP_UNIT_GENETIC
        else:
            raise m2pException("Unrecognized map sort unit " +
                               str(map_sort_by) + ".")

        # Loop over consecutive positions to compare them and create intervals
        prev_position = None
        prev_interval = None
        for map_position in sorted_map:
            #sys.stderr.write("\tMap position: "+str(map_position)+"\n")

            pos_marker = map_position.get_marker_id(
            )  #position[MapFields.MARKER_NAME_POS]
            pos_chr = map_position.get_chrom_name(
            )  #position[MapFields.MARKER_CHR_POS]
            pos_pos = map_position.get_sort_pos(
                map_sort_by)  #float(position[map_sort_by])
            pos_end_pos = map_position.get_sort_end_pos(map_sort_by)

            interval = self._get_new_interval(map_position, pos_chr, pos_pos,
                                              pos_end_pos, extend_window)
            #sys.stderr.write("\tInterval "+str(interval)+"\n")

            self._append_interval(map_intervals, interval)

        sys.stderr.write("MapEnricher: " + str(len(map_intervals)) +
                         " intervals created.\n")

        return map_intervals
Ejemplo n.º 36
0
    def _load_config(self, config_file):
        self._config_dict = {}
        self._config_list = []

        conf_rows = load_conf(config_file,
                              self._verbose)  # data_utils.load_conf

        #self._config_dict = load_maps(self._config_file, self._verbose) # data_utils.load_maps
        for conf_row in conf_rows:

            dataset_name = conf_row[DatasetsConfig.DATASET_NAME]
            dataset_id = conf_row[DatasetsConfig.DATASET_ID]
            dataset_type = conf_row[DatasetsConfig.DATASET_TYPE]
            file_path = conf_row[DatasetsConfig.FILE_PATH]
            file_type = conf_row[DatasetsConfig.FILE_TYPE]
            databases = conf_row[DatasetsConfig.DATABASES].strip().split(",")
            synonyms = conf_row[DatasetsConfig.SYNONYMS]
            prefixes = conf_row[DatasetsConfig.PREFIXES].strip().split(",")

            dataset = DatasetConfig(dataset_name, dataset_id, dataset_type,
                                    file_path, file_type, databases, synonyms,
                                    prefixes)

            if dataset_name.startswith(">"):
                dataset.set_dataset_name(
                    dataset_name[1:])  # remove the ">" from the name
                dataset.set_ignore_build(
                    True
                )  # mark the dataset as to be ignored in the build datasets script

            if dataset_id in self._config_dict:
                raise m2pException("Duplicated dataset " + dataset_id +
                                   " in configuration file " + config_file +
                                   ".")
            else:
                self._config_dict[dataset_id] = dataset
                self._config_list.append(dataset_id)

        return
Ejemplo n.º 37
0
 def get_aligner(aligner_list, n_threads, paths_config, verbose = False): # This is an AlignerFactory
     
     aligner = None
     
     tmp_files_dir = paths_config.get_tmp_files_path()
     
     if len(aligner_list) > 1:
         aligners = []
         
         for aligner_name in aligner_list:
             
             try:
                 aligner = AlignersFactory.get_aligner([aligner_name], n_threads, paths_config, verbose)
                 aligners.append(aligner)
                 
             except m2pException:
                 sys.stderr.write("WARNING: exception obtaining "+aligner_name+".\nSkipping to next aligner.\n")
             
         aligner = ListAligner(aligners, tmp_files_dir)
         
     else:
         aligner_name = aligner_list[0]
         if aligner_name == ALIGNER_BLASTN:
             
             aligner = AlignersFactory.get_aligner_blastn(paths_config, n_threads, verbose)
             
         elif aligner_name == ALIGNER_GMAP:
             
             aligner = AlignersFactory.get_aligner_gmap(paths_config, n_threads, verbose)
             
         elif aligner_name == ALIGNER_HSBLASTN:
             
             aligner = AlignersFactory.get_aligner_hsblastn(paths_config, n_threads, verbose)
             
         else:
             raise m2pException("Unknown aligner type "+str(aligner_name)+" when requesting aligner.")
     
     return aligner
Ejemplo n.º 38
0
 def get_feature(marker_id, dataset_id, dataset_name,
             feature_type, mapping_result):
     
     feature = None
     
     if feature_type == DatasetsConfig.DATASET_TYPE_GENETIC_MARKER:
         
         feature = MarkerMapping(marker_id, dataset_id, dataset_name,
                                  feature_type, mapping_result, FeatureMapping.ROW_TYPE_ENRICHMENT)
         
     elif feature_type == DatasetsConfig.DATASET_TYPE_GENE:
         
         feature = GeneMapping(marker_id, dataset_id, dataset_name,
                                 feature_type, mapping_result, FeatureMapping.ROW_TYPE_ENRICHMENT, annots = [])
         
     elif feature_type == DatasetsConfig.DATASET_TYPE_ANCHORED:
         
         feature = AnchoredMapping(marker_id, dataset_id, dataset_name,
                                   feature_type, mapping_result, FeatureMapping.ROW_TYPE_ENRICHMENT)
         
     else:
         raise m2pException("Unrecognized feature type "+str(feature_type)+".")
     
     return feature
Ejemplo n.º 39
0
def __gmap(gmap_app_path,
           n_threads,
           threshold_id,
           threshold_cov,
           query_fasta_path,
           gmap_dbs_path,
           db_name,
           verbose=False):

    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = gmap_dbs_path + "/" + db_name
    dbpathfile = dbpath + "/" + db_name + ".ref153positions"
    sys.stderr.write("Checking database: " + dbpath + " DB exists for " +
                     ALIGNER + ".\n")

    if not (os.path.exists(dbpathfile) and os.path.isfile(dbpathfile)):
        raise m2pException("DB path " + dbpath + " for " + ALIGNER +
                           " aligner NOT FOUND.")

    # GMAP
    __command = "".join([gmap_app_path, \
                " -t ", str(n_threads), \
                " -B 0 -n ", str(MAX_NUMBER_PATHS_PER_QUERY)])

    gmap_thres_id = float(threshold_id) / 100.0
    gmap_thres_cov = float(threshold_cov) / 100.0

    if verbose:
        sys.stderr.write("m2p_gmap: Thresholds: ID=" + str(gmap_thres_id) +
                         "; COV=" + str(gmap_thres_cov) + "\n")

    __filter_id = "--min-identity=" + str(gmap_thres_id)
    __filter_cov = "--min-trimmed-coverage=" + str(gmap_thres_cov)
    __db = "".join([" -d ", db_name])
    __db_dir = "".join([" -D ", gmap_dbs_path])

    gmap_cmd = " ".join([
        __command, __filter_id, __filter_cov, __db, __db_dir, query_fasta_path
    ])

    if verbose: sys.stderr.write("m2p_gmap: Executing '" + gmap_cmd + "'\n")

    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(gmap_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(gmap_cmd, shell=True, stdout=PIPE, stderr=PIPE)

    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode

    if retValue != 0:
        if verbose:
            raise Exception("m2p_gmap: return != 0. " + gmap_cmd + "\n")
        else:
            raise Exception("m2p_gmap: return != 0. " + gmap_cmd +
                            "\nError: " + str(output_err) + "\n")

    if verbose:
        sys.stderr.write("m2p_gmap: GMAP return value " + str(retValue) +
                         "\n" + str(output_err) + "\n")

    results = __compress(output, db_name)

    #print "M2PGMAP***********************"
    #for result in results:
    #    print result

    return results
Ejemplo n.º 40
0
    def _map_intervals(self, sorted_map, map_sort_by, extend_window):
        map_intervals = []

        if self._verbose:
            sys.stderr.write(
                "MapEnricher: creating intervals around markers\n")

        sys.stderr.write("MapEnricher: map sort by " + str(map_sort_by) +
                         ", extend interval " + str(extend_window) + "\n")

        if map_sort_by == MapTypes.MAP_SORT_PARAM_BP:
            self.MAP_UNIT = self.MAP_UNIT_PHYSICAL
        elif map_sort_by == MapTypes.MAP_SORT_PARAM_CM:
            self.MAP_UNIT = self.MAP_UNIT_GENETIC
        else:
            raise m2pException("Unrecognized map sort unit " +
                               str(map_sort_by) + ".")

        # Loop over consecutive positions to compare them and create intervals
        prev_position = None
        prev_interval = None
        for map_position in sorted_map:

            pos_marker = map_position.get_marker_id(
            )  #position[MapFields.MARKER_NAME_POS]
            pos_chr = map_position.get_chrom_name(
            )  #position[MapFields.MARKER_CHR_POS]
            pos_pos = map_position.get_sort_pos(
                map_sort_by)  #float(position[map_sort_by])
            pos_end_pos = map_position.get_sort_end_pos(map_sort_by)

            #if self._verbose: sys.stderr.write("\tMap position: "+str(map_position)+"\n")

            interval = self._get_new_interval(map_position, pos_chr, pos_pos,
                                              pos_end_pos, extend_window)

            ## check whether intervals overlap to each other
            if prev_position:

                prev_chr = prev_position.get_chrom_name(
                )  #prev_position[MapFields.MARKER_CHR_POS]
                if pos_chr != prev_chr:
                    self._append_interval(map_intervals, prev_interval)

                # The same chromosome...
                else:
                    # Check if there is overlap
                    if MapInterval.intervals_overlap(prev_interval, interval):
                        self._add_position_to_interval(prev_interval,
                                                       map_position,
                                                       pos_end_pos,
                                                       extend_window)
                        interval = prev_interval
                        #if self._verbose: sys.stdout.write("\t\toverlap --> Updated interval "+str(prev_interval)+"\n")
                    else:
                        self._append_interval(map_intervals, prev_interval)

            # If first interval
            # else: DO NOTHING

            prev_position = map_position
            prev_interval = interval

        # Append the last interval
        if prev_interval:
            self._append_interval(map_intervals, prev_interval)

        sys.stderr.write("MapEnricher: " + str(len(map_intervals)) +
                         " intervals created.\n")

        return map_intervals
Ejemplo n.º 41
0
def __split_blast(split_blast_path, blast_app_path, n_threads, query_fasta_path, blast_dbs_path, db_name, verbose = False):
    results = []
    
    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = blast_dbs_path + db_name
    dbpathfile = dbpath + ".nsq"
    dbpathfile2 = dbpath + ".nal"
    sys.stderr.write("Checking database: "+dbpath+" DB exists for "+ALIGNER+".\n")
    
    if not ((os.path.exists(dbpathfile) or os.path.exists(dbpathfile2)) \
        and (os.path.isfile(dbpathfile) or os.path.isfile(dbpathfile2))):
        
        raise m2pException("DB path "+dbpath+" for "+ALIGNER+" aligner NOT FOUND.")
    
    ###### Split blast bins
    ###### Retrieve num of fasta seqs to calculate necessary bins
    retValue = 0
    p = Popen(" ".join(["cat", query_fasta_path, " | grep -c \"^>\""]), \
              shell=True, stdout=PIPE, stderr=sys.stderr)
    output = p.communicate()[0]
    retValue = p.returncode
    if retValue == 0:
        num_of_seqs = int(output)
        split_blast_bins = (num_of_seqs / 50) + 1
    else:
        split_blast_bins = 100
    
    ###### Split blast
    blast_command = " ".join([split_blast_path+"split_blast.pl", str(n_threads), str(split_blast_bins), blast_app_path, \
                "-dust no -soft_masking false -task megablast", \
                '-outfmt \\"6 qseqid qlen sseqid slen length qstart qend sstart send bitscore evalue pident mismatch gapopen\\"'])
    
    blast_db = "".join(["-db ", dbpath]) # blast_db = "".join(["-db ", blast_dbs_path, db_name , ".fa"]) # 
    blast_query = " ".join(["-query ", query_fasta_path])
    #blast_cmd = " ".join([ResourcesMng.get_deploy_dir()+blast_command, blast_db, blast_query])
    blast_cmd = " ".join([blast_command, blast_db, blast_query])
    
    if verbose: sys.stderr.write("m2p_split_blast: Executing '"+blast_cmd+"'\n")
    
    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=PIPE)
    
    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode
    
    if retValue != 0:
        if verbose:
            raise Exception("m2p_split_blast: Blast return != 0. "+blast_cmd+"\n"+str(output)+"\n")
        else:
            raise Exception("m2p_split_blast: Blast return != 0. "+blast_cmd+"\n"+str(output)+"\n"+str(output_err)+"\n")
    
    if "error" in output or "Error" in output or "ERROR" in output:
        sys.stderr.write("m2p_split_blast: error in blast output. We will report 0 results for this alignment.\n")
        sys.stderr.write(output+"\n")
        sys.stderr.write(str(output_err)+"\n")
        results = []
    else:
        if verbose: sys.stderr.write("m2p_split_blast: Blast return value "+str(retValue)+"\n")
        
        [results.append(line) for line in output.strip().split("\n") if line != "" and not line.startswith("#")]
        # startswith("#") due to split_blast.pl printing in stdout comments, warnings and so on
    
    return results
Ejemplo n.º 42
0
 def align(self, fasta_path, db, ref_type, threshold_id, threshold_cov):
     raise m2pException("BaseAligner is an abstract class. 'align' has to be implemented in child class.")
Ejemplo n.º 43
0
def __split_blast(split_blast_path,
                  blast_app_path,
                  n_threads,
                  query_fasta_path,
                  blast_dbs_path,
                  db_name,
                  verbose=False):
    results = []

    # CPCantalapiedra 201701
    ###### Check that DB is available for this aligner
    dbpath = blast_dbs_path + db_name
    dbpathfile = dbpath + ".nsq"
    dbpathfile2 = dbpath + ".nal"
    sys.stderr.write("Checking database: " + dbpath + " DB exists for " +
                     ALIGNER + ".\n")

    if not ((os.path.exists(dbpathfile) or os.path.exists(dbpathfile2)) \
        and (os.path.isfile(dbpathfile) or os.path.isfile(dbpathfile2))):

        raise m2pException("DB path " + dbpath + " for " + ALIGNER +
                           " aligner NOT FOUND.")

    ###### Split blast bins
    ###### Retrieve num of fasta seqs to calculate necessary bins
    retValue = 0
    p = Popen(" ".join(["cat", query_fasta_path, " | grep -c \"^>\""]), \
              shell=True, stdout=PIPE, stderr=sys.stderr)
    output = p.communicate()[0]
    retValue = p.returncode
    if retValue == 0:
        num_of_seqs = int(output)
        split_blast_bins = (num_of_seqs / 50) + 1
    else:
        split_blast_bins = 100

    ###### Split blast
    blast_command = " ".join([split_blast_path+"split_blast.pl", str(n_threads), str(split_blast_bins), blast_app_path, \
                "-dust no -soft_masking false -task megablast", \
                '-outfmt \\"6 qseqid qlen sseqid slen length qstart qend sstart send bitscore evalue pident mismatch gapopen\\"'])

    blast_db = "".join([
        "-db ", dbpath
    ])  # blast_db = "".join(["-db ", blast_dbs_path, db_name , ".fa"]) #
    blast_query = " ".join(["-query ", query_fasta_path])
    #blast_cmd = " ".join([ResourcesMng.get_deploy_dir()+blast_command, blast_db, blast_query])
    blast_cmd = " ".join([blast_command, blast_db, blast_query])

    if verbose:
        sys.stderr.write("m2p_split_blast: Executing '" + blast_cmd + "'\n")

    retValue = 0
    FNULL = open(os.devnull, 'w')
    if verbose:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=sys.stderr)
    else:
        p = Popen(blast_cmd, shell=True, stdout=PIPE, stderr=PIPE)

    com_list = p.communicate()
    output = com_list[0]
    output_err = com_list[1]
    retValue = p.returncode

    if retValue != 0:
        if verbose:
            raise Exception("m2p_split_blast: Blast return != 0. " +
                            blast_cmd + "\n" + str(output) + "\n")
        else:
            raise Exception("m2p_split_blast: Blast return != 0. " +
                            blast_cmd + "\n" + str(output) + "\n" +
                            str(output_err) + "\n")

    if "error" in output or "Error" in output or "ERROR" in output:
        sys.stderr.write(
            "m2p_split_blast: error in blast output. We will report 0 results for this alignment.\n"
        )
        sys.stderr.write(output + "\n")
        sys.stderr.write(str(output_err) + "\n")
        results = []
    else:
        if verbose:
            sys.stderr.write("m2p_split_blast: Blast return value " +
                             str(retValue) + "\n")

        [
            results.append(line) for line in output.strip().split("\n")
            if line != "" and not line.startswith("#")
        ]
        # startswith("#") due to split_blast.pl printing in stdout comments, warnings and so on

    return results
Ejemplo n.º 44
0
 def output_features_pos(self, pos, map_as_physical, map_has_cm_pos, map_has_bp_pos, multiple_param, load_annot = False, annotator = None):
     raise m2pException("Method has to be implemented in child class inheriting from OutputPrinter")
Ejemplo n.º 45
0
     sys.stdout.write(_SCRIPT+": dataset "+dataset_name+" with id "+dataset_id+" created.\n")
 
 ### 2) GTF FILES
 ###
 elif dataset_file_type == DatasetsConfig.FILE_TYPE_GTF:
     
     ### Create the new directory
     _create_dir(dataset_path)
     
     maps_conf_file = __app_path+ConfigBase.MAPS_CONF
     maps_config = MapsConfig(maps_conf_file, verbose = verbose_param)
     
     # align to all the maps
     if (len(dataset_db_list)==1) and (dataset_db_list[0] == DatasetsConfig.DATABASES_ANY):
         
         raise m2pException("GTF files have to be associated to a single database in datasets configuration.")
         
     # align to maps which are associated to databases also associated to this dataset
     else:
         
         #paths_conf_file = __app_path+"/"+PATHS_CONF
         #config_path_dict = read_paths(paths_conf_file) # data_utils.read_paths
         #__app_path = config_path_dict["app_path"]
         maps_path = paths_config.get_maps_path()
         
         parsed_gtf = False
         for map_id in maps_config.get_maps():
             
             map_config = maps_config.get_map_config(map_id)
             map_db_list = map_config.get_db_list()
             
Ejemplo n.º 46
0
 def create_map(self, query_path, query_sets_ids, map_config, facade, sort_param, multiple_param, tmp_files_dir = None):
     raise m2pException("To be implemented in child classes.")
Ejemplo n.º 47
0
        ###
        elif dataset_file_type == DatasetsConfig.FILE_TYPE_GTF:

            ### Create the new directory
            _create_dir(dataset_path)

            maps_conf_file = __app_path + ConfigBase.MAPS_CONF
            maps_config = MapsConfig(maps_conf_file, verbose=verbose_param)

            # align to all the maps
            if (len(dataset_db_list)
                    == 1) and (dataset_db_list[0]
                               == DatasetsConfig.DATABASES_ANY):

                raise m2pException(
                    "GTF files have to be associated to a single database in datasets configuration."
                )

            # align to maps which are associated to databases also associated to this dataset
            else:

                #paths_conf_file = __app_path+"/"+PATHS_CONF
                #config_path_dict = read_paths(paths_conf_file) # data_utils.read_paths
                #__app_path = config_path_dict["app_path"]
                maps_path = paths_config.get_maps_path()

                parsed_gtf = False
                for map_id in maps_config.get_maps():

                    map_config = maps_config.get_map_config(map_id)
                    map_db_list = map_config.get_db_list()
Ejemplo n.º 48
0
 def perform_alignment(self, query_fasta_path, dbs_list, databases_config, threshold_id, threshold_cov):
     raise m2pException("SearchEngine is an abstract class. 'perform_alignment' must be implemented in a child class.")
Ejemplo n.º 49
0
 def perform_alignment(self, query_fasta_path, dbs_list, databases_config,
                       threshold_id, threshold_cov):
     raise m2pException(
         "SearchEngine is an abstract class. 'perform_alignment' must be implemented in a child class."
     )