def get_correlated_name(name, correlations): 'It return the name fixed by the correlation' # it looks in the marker_correlations_file marker_ = get_marker_from_correlations(name, correlations) if marker_ is not None: name = marker_['name'] return name
def read_markers(fhand, cmap, marker_corr=None): '''It reads the Monforte marker data. It uses the correlations file to adjust data''' map_reader = csv.reader(fhand, delimiter='\t') markers = cmap['features'] for row in map_reader: name = row[0].lower() name = name.strip() kind = row[1].strip() reference = row[2] correlation = get_marker_from_correlations(name, marker_corr) if correlation is not None: name = correlation['name'] if correlation['type'] is None: kind = correlation['sofa'] else: kind = correlation['type'] reference = correlation['publication'] if not kind: kind = 'unknown' markers[name] = {'name':name, 'type':kind, 'publication':reference}
def read_bin_map(fhand, cmap, correlations): 'It loads the bin map ' map_reader = csv.reader(fhand, delimiter='\t') map_reader.next() #ignore fist line map_data = [] for row in map_reader: group = row[1] marker = row[2].lower().strip() marker_type = row[3] bin_ = row[5] if not group and not bin_: continue marker_name = get_correlated_name(marker, correlations) marker_ = get_marker_from_correlations(marker_name, correlations) feature_type = marker_['sofa'] data = { 'group': group.lower(), 'marker':marker_name, 'marker_type': marker_type, 'bin': bin_, 'feature_type':feature_type} map_data.append(data) all_bins = [] list_bins = [] ref = None group = None for marker in map_data: kind = marker['marker_type'] marker_name = marker['marker'] if group is None or group != marker['group']: for bin_marker in list_bins: bin_marker['end_marker'] = ref all_bins.append(bin_marker) list_bins = [] ref = None group = marker['group'] if kind in ('bin', 'binint'): bin_marker = {'start_marker': None, 'end_marker': None, 'type':kind, 'name':marker_name, 'map': group, 'feature_type':marker['feature_type']} if ref is None: bin_marker['start_marker'] = None else: bin_marker['start_marker'] = ref list_bins.append(bin_marker) elif kind == 'ref': for bin_marker in list_bins: bin_marker['end_marker'] = marker_name all_bins.append(bin_marker) list_bins = [] ref = marker_name cmap_ = Cmap(cmap) # now put the positions for bin in all_bins: if bin['start_marker'] is None: bin['start_position'] = 0 else: try: start = cmap_.feature_location(feature=bin['start_marker'], mapset='icugi', map_=bin['map'])[2] except KeyError: print "bin start not found, reference: %s group : %s " % \ (bin['start_marker'], bin['map']) start = 0 # this is to be able to work, this is unreal bin['start_position'] = start if bin['end_marker'] is None: bin['end_postion'] = bin['start_position'] else: try: start = cmap_.feature_location(feature=bin['end_marker'], mapset='icugi', map_=bin['map'])[2] except KeyError: print "bin end not found, reference: %s group : %s " % \ (bin['end_marker'], bin['map']) start = 0 # this is to be able to work, this is unreal bin['end_position'] = start # now look at the start position of the bininit for bin in all_bins: if bin['type'] == 'binint': half = (bin['end_position'] - bin['start_position']) / float(2) bin['start_position'] += half bin['start_position'] = int(round(bin['start_position'])) #add to cmap for bin in all_bins: #add to markers marker_name = bin['name'] sofa = bin['feature_type'] cmap['features'][marker_name] = {'type':sofa, 'name':marker_name} # add to mapset(icugi) feat_loc = {'feature':marker_name, 'start':bin['start_position'], 'end':bin['end_position']} # Now we need to guess where is this feature_loc adding. # first traduce from icugi to map number for index, mapset in enumerate(cmap['map_sets']): if mapset['name'] == 'icugi': icugi_index = index for index2, map_ in enumerate(mapset['maps']): if map_['accession'] == bin['map']: map_index = index2 cmap['map_sets'][icugi_index]['maps'][map_index]['feature_locations'].append(feat_loc)