Example #1
0
 def _create_row_feature(self, feature_mapping, collapsed_view):
     
     if collapsed_view:
         new_map_position = feature_mapping.clone()
     else:
         map_position = MappingResult.get_empty()
         
         new_map_position = map_position.clone()
         new_map_position.set_feature(feature_mapping)
     
     return new_map_position
Example #2
0
    def _create_row_feature(self, feature_mapping, collapsed_view):

        if collapsed_view:
            new_map_position = feature_mapping.clone()
        else:
            map_position = MappingResult.get_empty()

            new_map_position = map_position.clone()
            new_map_position.set_feature(feature_mapping)

        return new_map_position
Example #3
0
    def _parse_index_file_by_id(self, query_ids_dict, index_path, data_path,
                                map_config, chrom_dict, multiple_param,
                                dataset_synonyms, test_set):
        mapping_results_list = []

        ## TODO: DO SOMETHING WITH THE SYNONYMS!!!
        ##

        map_name = map_config.get_name()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()
        map_is_physical = map_config.as_physical()

        sys.stderr.write("MappingsParser: loading index " + str(index_path) +
                         "...\n")

        with open(index_path, 'r') as index_f:
            index = cPickle.load(index_f)

        sys.stderr.write("MappingsParser: loaded index with " +
                         str(len(index)) + " entries.\n")

        sys.stderr.write("MappingsParser: obtaining index of queries...\n")
        queries_bytes = []
        for query in test_set:
            if query in index:
                query_bytes = index[query]

                query_ids_dict[query] = 1  # found

                queries_bytes.append(query_bytes)

        with open(data_path, 'r') as data_f:
            for query_bytes in queries_bytes:
                data_f.seek(query_bytes)
                mapping_line = data_f.readline()
                mapping_data = mapping_line.strip().split("\t")
                mapping_result = MappingResult.init_from_data(
                    mapping_data, map_name, chrom_dict, map_is_physical,
                    map_has_cm_pos, map_has_bp_pos)

                if mapping_result.has_multiple_pos():
                    if multiple_param == False:
                        continue

                mapping_results_list.append(mapping_result)

        return mapping_results_list
Example #4
0
    def parse_mapping_file(self, data_path, map_config, chrom_dict):
        mapping_results_list = []

        map_name = map_config.get_name()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()
        map_is_physical = map_config.as_physical()

        for hit in open(data_path, 'r'):
            if hit.startswith(">") or hit.startswith("#"): continue
            hit_data = hit.strip().split("\t")

            mapping_result = MappingResult.init_from_data(
                hit_data, map_name, chrom_dict, map_is_physical,
                map_has_cm_pos, map_has_bp_pos)
            mapping_results_list.append(mapping_result)

        return mapping_results_list
Example #5
0
    def _createPositions(self, markers_positions, multiple_param, chrom_dict,
                         map_name):
        positions_list = []

        for marker_id in markers_positions:
            #sys.stderr.write(marker_id+"\n")
            positions = markers_positions[marker_id]["positions"]
            #sys.stderr.write(str(positions)+"\n")
            num_marker_pos = len(positions)

            if num_marker_pos == 0: continue  # contigs without position
            if (not multiple_param) and (num_marker_pos > 1):
                if self._verbose:
                    sys.stderr.write(
                        "Mappers: discarded multiple pos marker: " +
                        str(marker_id) + "\n")
                continue  # Multiple positions

            num_contig_no_pos = len(
                markers_positions[marker_id]["hits_no_position"])

            for pos in positions:
                # marker - chr - cm_pos - bp_pos - multiple - has_contigs_with_no_pos - map_name
                chr_pos = pos["chr"]
                # If the chromosome is not in the genome, skip this alignment result
                if not chr_pos in chrom_dict: continue

                chrom_order = chrom_dict[
                    chr_pos]  # Numeric value of chromsome (for sorting purposes)

                mapping_result = MappingResult(
                    marker_id, chr_pos, chrom_order, pos["cm_pos"],
                    pos["cm_end_pos"], pos["bp_pos"], pos["bp_end_pos"],
                    pos["strand"], num_marker_pos > 1, num_contig_no_pos > 0,
                    map_name)
                positions_list.append(mapping_result)

        return positions_list
Example #6
0
    def _parse_mapping_file_by_id(self,
                                  query_ids_dict,
                                  data_path,
                                  map_config,
                                  chrom_dict,
                                  multiple_param,
                                  dataset_synonyms={},
                                  test_set=None):
        mapping_results_list = []

        map_name = map_config.get_name()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()
        map_is_physical = map_config.as_physical()

        for hit in open(data_path, 'r'):
            #sys.stderr.write(" ONE**************************\n")
            #sys.stderr.write(str(hit)+"\n")
            if hit.startswith(">") or hit.startswith("#"): continue
            hit_data = hit.strip().split("\t")

            #sys.stderr.write("data\n")

            if test_set:

                hit_query = hit_data[0]

                #if hit_query == "12_30924":
                #    sys.stderr.write(str(test_set)+"\n")

                #sys.stderr.write("CHECK TESTSET\n")
                if hit_query in dataset_synonyms:
                    #if hit_query == "12_30924":
                    #    sys.stderr.write("IS IN SYNONYMS\n")
                    hit_synonyms = dataset_synonyms[hit_query]
                    synonyms_found = test_set.intersection(hit_synonyms)

                    if len(synonyms_found) > 0:
                        #if hit_query == "12_30924":
                        #    sys.stderr.write("-".join(synonyms_found)+"\n")
                        mapping_result = MappingResult.init_from_data(
                            hit_data, map_name, chrom_dict, map_is_physical,
                            map_has_cm_pos, map_has_bp_pos)

                        for synonym in synonyms_found:  # all found
                            query_ids_dict[synonym] = 1

                        if mapping_result.has_multiple_pos():
                            if multiple_param == False:
                                continue
                        else:  # just for sake of readability
                            for synonym in synonyms_found:  # all found
                                if synonym in test_set:
                                    test_set.remove(synonym)

                        mapping_result.set_marker_id("|".join(synonyms_found))
                        mapping_results_list.append(mapping_result)
                else:
                    #if hit_query == "12_30924":
                    #    sys.stderr.write("IS NOT IN SYNONYMS\n")
                    if hit_query in test_set:
                        #sys.stderr.write("create mapping data\n")
                        mapping_result = MappingResult.init_from_data(
                            hit_data, map_name, chrom_dict, map_is_physical,
                            map_has_cm_pos, map_has_bp_pos)

                        query_ids_dict[hit_query] = 1  # found

                        #sys.stderr.write("append\n")
                        if mapping_result.has_multiple_pos():
                            if multiple_param == False:
                                continue
                        else:  # just for sake of readability
                            test_set.remove(hit_query)

                        mapping_results_list.append(mapping_result)

            else:  # retrieve all mapping results
                mapping_result = MappingResult.init_from_data(
                    hit_data, map_name, chrom_dict, map_is_physical,
                    map_has_cm_pos, map_has_bp_pos)

                query_ids_dict[hit_query] = 1  # found

                if mapping_result.has_multiple_pos():
                    if multiple_param == False:
                        continue

                mapping_results_list.append(mapping_result)

            #sys.stderr.write("**********NEXT\n")
            if len(test_set) == 0: break

        return mapping_results_list
Example #7
0
    def parse_mapping_file_on_pos(self, map_intervals, data_path, chrom_dict,
                                  map_config, map_sort_by, dataset,
                                  dataset_name, feature_type):

        map_name = map_config.get_name()
        map_is_physical = map_config.as_physical()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()

        current_interval_pos = 0
        featured_current_interval = map_intervals[current_interval_pos]
        current_interval = featured_current_interval.get_map_interval()
        current_features = featured_current_interval.get_features()

        #sys.stderr.write("MappingsParser \n")
        #sys.stderr.write("\t"+str(current_interval)+"\n")
        #sys.stderr.write("\t"+str(len(current_features))+"\n")
        #for feature in current_features:
        #    sys.stderr.write("\t\t"+str(feature)+"\n")

        # Find all the hits for this map
        for hit in open(data_path, 'r'):
            if hit.startswith(">") or hit.startswith("#"): continue
            hit_data = hit.strip().split("\t")

            #sys.stderr.write(hit+"\n")
            #sys.stderr.write("\t"+str(current_interval)+"\n")

            mapping_result = MappingResult.init_from_data(
                hit_data, map_name, chrom_dict, map_is_physical,
                map_has_cm_pos, map_has_bp_pos)

            chrom_name = mapping_result.get_chrom_name()

            if chrom_name != current_interval.get_chrom(): continue

            map_end_pos = mapping_result.get_sort_end_pos(map_sort_by)

            if float(map_end_pos) < float(current_interval.get_ini_pos()):
                continue

            marker_id = mapping_result.get_marker_id()
            chrom_order = mapping_result.get_chrom_order()
            map_pos = mapping_result.get_sort_pos(
                map_sort_by)  #float(mapping_result.get_sort_pos(map_sort_by))

            dataset_interval = MapInterval(chrom_name, map_pos, map_end_pos)

            does_overlap = MapInterval.intervals_overlap(
                dataset_interval, current_interval)
            # This if-else could be unnecessary, but hopefully is useful to read the code
            if does_overlap:
                next_interval_pos = current_interval_pos
                next_interval = current_interval
                next_features = current_features
                while (does_overlap):
                    feature = FeaturesFactory.get_feature(
                        marker_id, dataset, dataset_name, feature_type,
                        mapping_result)
                    next_features.append(feature)

                    next_interval_pos += 1
                    if next_interval_pos >= len(map_intervals):
                        break
                    featured_next_interval = map_intervals[next_interval_pos]
                    next_interval = featured_next_interval.get_map_interval()
                    next_features = featured_next_interval.get_features()

                    does_overlap = MapInterval.intervals_overlap(
                        dataset_interval, next_interval)

            else:
                while (float(map_pos) > float(current_interval.get_end_pos())):
                    current_interval_pos += 1
                    if current_interval_pos >= len(map_intervals):
                        break
                    featured_current_interval = map_intervals[
                        current_interval_pos]
                    current_interval = featured_current_interval.get_map_interval(
                    )
                    current_features = featured_current_interval.get_features()

        #sys.stderr.write("MappingsParser generated intervals\n")
        #for featured_map_interval in map_intervals:
        #    map_interval = featured_map_interval.get_map_interval()
        #    sys.stderr.write("\tinterval: "+str(map_interval)+"\n")
        #    features = featured_map_interval.get_features()
        #    for feature in features:
        #        sys.stderr.write("\t\tfeature: "+str(feature)+"\n")

        return map_intervals
Example #8
0
    def parse_mapping_file_by_pos(self, map_intervals, data_path, chrom_dict,
                                  map_config, map_sort_by):
        mapping_results_list = []

        map_name = map_config.get_name()
        map_is_physical = map_config.as_physical()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()

        current_interval_pos = 0
        current_interval = map_intervals[current_interval_pos]

        # Find all the hits for this map within intervals of interest
        # Note: hits/map features are pre-computed & sorted along chroms, chroms are in chrom_dict order
        # Note: intervals are pre-sorted as well
        for hit in open(data_path, 'r'):
            if hit.startswith(">") or hit.startswith("#"): continue
            hit_data = hit.strip().split("\t")

            #sys.stderr.write(hit+"\n")
            #sys.stderr.write("\t"+str(current_interval)+"\n")

            mapping_result = MappingResult.init_from_data(
                hit_data, map_name, chrom_dict, map_is_physical,
                map_has_cm_pos, map_has_bp_pos)

            chrom_name = mapping_result.get_chrom_name()

            # move to next interval to match chrom (only if previous mappings exist)
            # Note: needed when last mapping matched exactly the last gene of a chrom
            while len(mapping_results_list) > 0 and \
                current_interval_pos < len(map_intervals)-1 and \
                int(chrom_dict[chrom_name]) > int(chrom_dict[current_interval.get_chrom()]):
                current_interval_pos += 1
                current_interval = map_intervals[current_interval_pos]

            if chrom_name != current_interval.get_chrom(): continue

            map_end_pos = mapping_result.get_sort_end_pos(map_sort_by)

            if float(map_end_pos) < float(current_interval.get_ini_pos()):
                continue

            marker_id = mapping_result.get_marker_id()
            chrom_order = mapping_result.get_chrom_order()
            map_pos = mapping_result.get_sort_pos(
                map_sort_by)  #float(mapping_result.get_sort_pos(map_sort_by))

            while (float(map_pos) > float(current_interval.get_end_pos())):
                current_interval_pos += 1
                if current_interval_pos >= len(map_intervals):
                    break
                current_interval = map_intervals[current_interval_pos]
                if current_interval.get_chrom() != chrom_name:
                    break

            if current_interval_pos >= len(map_intervals): break

            if chrom_name != current_interval.get_chrom(): continue

            if float(map_end_pos) < float(current_interval.get_ini_pos()):
                continue

            dataset_interval = MapInterval(chrom_name, map_pos, map_end_pos)

            #sys.stderr.write("MappingsParser: by_pos "+str(dataset_interval)+" - "+str(current_interval)+"\n")

            does_overlap = MapInterval.intervals_overlap(
                dataset_interval, current_interval)

            # Check if alignment overlaps with some mapping interval
            if does_overlap:
                mapping_results_list.append(mapping_result)

        return mapping_results_list
 def get_empty():
     return GeneMapping("-", "-", "-", "-", MappingResult.get_empty(), FeatureMapping.ROW_TYPE_MAPPING_RESULT, empty = True, annots = [])
Example #10
0
 def get_empty():
     return MarkerMapping("-", "-", "-", "-", MappingResult.get_empty(), FeatureMapping.ROW_TYPE_MAPPING_RESULT, empty = True)
Example #11
0
    def parse_mapping_file_by_pos(self, map_intervals, data_path, chrom_dict,
                                  map_config, map_sort_by):
        mapping_results_list = []

        map_name = map_config.get_name()
        map_is_physical = map_config.as_physical()
        map_has_cm_pos = map_config.has_cm_pos()
        map_has_bp_pos = map_config.has_bp_pos()

        current_interval_pos = 0
        current_interval = map_intervals[current_interval_pos]

        # Find all the hits for this map
        for hit in open(data_path, 'r'):
            if hit.startswith(">") or hit.startswith("#"): continue
            hit_data = hit.strip().split("\t")

            #sys.stderr.write(hit+"\n")
            #sys.stderr.write("\t"+str(current_interval)+"\n")

            mapping_result = MappingResult.init_from_data(
                hit_data, map_name, chrom_dict, map_is_physical,
                map_has_cm_pos, map_has_bp_pos)

            chrom_name = mapping_result.get_chrom_name()

            if chrom_name != current_interval.get_chrom(): continue

            map_end_pos = mapping_result.get_sort_end_pos(map_sort_by)

            if float(map_end_pos) < float(current_interval.get_ini_pos()):
                continue

            marker_id = mapping_result.get_marker_id()
            chrom_order = mapping_result.get_chrom_order()
            map_pos = mapping_result.get_sort_pos(
                map_sort_by)  #float(mapping_result.get_sort_pos(map_sort_by))

            while (float(map_pos) > float(current_interval.get_end_pos())):
                current_interval_pos += 1
                if current_interval_pos >= len(map_intervals):
                    break
                current_interval = map_intervals[current_interval_pos]
                if current_interval.get_chrom() != chrom_name:
                    break

            if current_interval_pos >= len(map_intervals): break

            if chrom_name != current_interval.get_chrom(): continue

            if float(map_end_pos) < float(current_interval.get_ini_pos()):
                continue

            dataset_interval = MapInterval(chrom_name, map_pos, map_end_pos)

            #sys.stderr.write("MappingsParser: by_pos "+str(dataset_interval)+" - "+str(current_interval)+"\n")

            does_overlap = MapInterval.intervals_overlap(
                dataset_interval, current_interval)

            # Check if alignment overlaps with some mapping interval
            if does_overlap:
                mapping_results_list.append(mapping_result)

        return mapping_results_list