def pos(argv): # default parameters leg_file_name = None in_only = False # read arguments try: opts, args = getopt.getopt(argv[1:], "l:O") except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write("Usage: dip-c leg [options] -l <in.leg> <in.3dg>\n") sys.stderr.write("Options:\n") sys.stderr.write( " -l <in.leg> LEG file to convert to 3D positions (required)\n" ) sys.stderr.write(" -O exclude out-of-bound legs\n") return 1 for o, a in opts: if o == "-l": leg_file_name = a if o == "-O": in_only = True if leg_file_name is None: sys.stderr.write("[E::" + __name__ + "] -l is required\n") return 1 # read 3DG file g3d_data = file_to_g3d_data(open(args[0], "rb")) g3d_data.sort_g3d_particles() g3d_resolution = g3d_data.resolution() sys.stderr.write( "[M::" + __name__ + "] read a 3D structure with " + str(g3d_data.num_g3d_particles()) + " particles at " + ("N.A." if g3d_resolution is None else str(g3d_resolution)) + " bp resolution\n") g3d_data.prepare_interpolate() # convert LEG file to 3DG particles for leg_file_line in open(leg_file_name, "rb"): is_out, position = g3d_data.interpolate_leg( string_to_leg(leg_file_line.strip())) if position is None or (is_out and in_only): sys.stdout.write("None\n") else: sys.stdout.write("\t".join(map(str, position)) + "\n") return 0
def ard(argv): # default parameters reference_file_name = None min_separation = None max_distance = 10000000 grid_size = None is_symmetrical = True superellipse_mode = False count_mode = False normalize_by_num_cons = False leg_file_1_name = None leg_file_2_name = None # progress display parameters display_num_ref_cons = 1000 # read arguments try: opts, args = getopt.getopt(argv[1:], "c:s:d:h:Sent1:2:") except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write("Usage: dip-c ard [options] <in.con>\n") sys.stderr.write("Options:\n") sys.stderr.write( " -c <ref.con> contact file for reference points [<in.con> itself]\n" ) sys.stderr.write( " -s INT only use intra-chromosomal reference points, min separation (bp) [only use inter-chromosomal] \n" ) sys.stderr.write( " -d INT max distance (bp, L-inf norm) around reference points [" + str(max_distance) + "]\n") sys.stderr.write( " -h INT output 2D histogram, grid size (bp) (useful for too many contacts)\n" ) sys.stderr.write( " -e use L-1/2 norm (superellipse) instead\n") sys.stderr.write( " -S does not symmetrize for \"-h\"\n\n") sys.stderr.write( " -n output the number of nearby contacts for each reference point\n" ) sys.stderr.write( " -t normalize by the total number of contacts for \"-n\"\n\n" ) sys.stderr.write( " -1 <in1.leg> generate a pairwise count matrix between reference legs\n" ) sys.stderr.write( " -2 <in2.leg> generate a pairwise count matrix between two sets of reference legs [<in2.leg>]\n" ) return 1 for o, a in opts: if o == "-c": reference_file_name = a elif o == "-s": min_separation = int(a) elif o == "-d": max_distance = int(a) elif o == "-h": grid_size = int(a) elif o == "-S": is_symmetrical = False elif o == "-e": superellipse_mode = True elif o == "-n": count_mode = True elif o == "-t": normalize_by_num_cons = True elif o == "-1": leg_file_1_name = a elif o == "-2": leg_file_2_name = a # read CON file con_file = gzip.open(args[0], "rb") if args[0].endswith(".gz") else open( args[0], "rb") con_data = file_to_con_data(con_file) sys.stderr.write( "[M::" + __name__ + "] read " + str(con_data.num_cons()) + " contacts (" + str(round(100.0 * con_data.num_intra_chr() / con_data.num_cons(), 2)) + "% intra-chromosomal, " + str( round(100.0 * con_data.num_phased_legs() / con_data.num_cons() / 2, 2)) + "% legs phased)\n") if leg_file_1_name is None: # regular mode # read reference CON file if reference_file_name is None: # use itself ref_con_data = copy.deepcopy(con_data) else: # open another file ref_con_file = gzip.open( reference_file_name, "rb") if reference_file_name.endswith(".gz") else open( reference_file_name, "rb") ref_con_data = file_to_con_data(ref_con_file) sys.stderr.write("[M::" + __name__ + "] read " + str(ref_con_data.num_cons()) + " reference points (" + str( round( 100.0 * ref_con_data.num_intra_chr() / ref_con_data.num_cons(), 2)) + "% intra-chromosomal)\n") # keep only desired reference points if min_separation is None: # inter-chromosomal only ref_con_data.clean_intra_chr() else: # intra-chromosmal only, remove small separations ref_con_data.clean_inter_chr() ref_con_data.clean_separation(min_separation) sys.stderr.write("[M::" + __name__ + "] kept " + str(ref_con_data.num_cons()) + " reference points (" + str( round( 100.0 * ref_con_data.num_intra_chr() / ref_con_data.num_cons(), 2)) + "% intra-chromosomal)\n") # initialize 2D histogram if not grid_size is None: grid_num = 2 * max_distance / grid_size around_hist = np.zeros((grid_num, grid_num), dtype=np.int) # find relation positions con_data.sort_cons() num_ref_cons = 0 for ref_con in ref_con_data.get_cons(): num_ref_cons += 1 if num_ref_cons % display_num_ref_cons == 0: sys.stderr.write("[M::" + __name__ + "] analyzed " + str(num_ref_cons) + " reference points\n") num_nearby_cons = 0 for con in (con_data.get_cons_near(ref_con, max_distance) if superellipse_mode else con_data.get_cons_near_inf( ref_con, max_distance)): num_nearby_cons += 1 if count_mode: continue if grid_size is None: # output relative positions sys.stdout.write(con.to_string_around(ref_con) + "\n") else: # calculate histogram rel_locus = con.to_rel_locus_around(ref_con) if is_symmetrical: # symmetrize if min_separation is None: # inter-chromosomal: 8 copies for sign_1 in [-1, 1]: for sign_2 in [-1, 1]: add_ref_locus_to_hist( around_hist, (sign_1 * rel_locus[0], sign_2 * rel_locus[1]), max_distance, grid_size) add_ref_locus_to_hist( around_hist, (sign_2 * rel_locus[1], sign_1 * rel_locus[0]), max_distance, grid_size) else: # intra-chromosomal: 2 copies add_ref_locus_to_hist(around_hist, (rel_locus[0], rel_locus[1]), max_distance, grid_size) add_ref_locus_to_hist( around_hist, (-1 * rel_locus[1], -1 * rel_locus[0]), max_distance, grid_size) else: add_ref_locus_to_hist(around_hist, (rel_locus[0], rel_locus[1]), max_distance, grid_size) if count_mode: if normalize_by_num_cons: sys.stdout.write( str(float(num_nearby_cons) / con_data.num_cons()) + "\n") else: sys.stdout.write(str(num_nearby_cons) + "\n") # output 2D histogram if not grid_size is None: sys.stderr.write("[M::" + __name__ + "] writing output for 2D histogram\n") np.savetxt(sys.stdout, around_hist, delimiter='\t') else: # pairwise leg mode # read legs legs_1 = [ string_to_leg(leg_file_line.strip()) for leg_file_line in open(leg_file_1_name, "rb") ] if leg_file_2_name is None: legs_2 = legs_1 else: legs_2 = [ string_to_leg(leg_file_line.strip()) for leg_file_line in open(leg_file_2_name, "rb") ] # initilize pariwise count matrix num_legs_1 = len(legs_1) num_legs_2 = len(legs_2) count_matrix = np.empty([num_legs_1, num_legs_2], dtype=int) count_matrix[:] = -1 # for each pair of legs num_ref_cons = 0 for i in range(num_legs_1): for j in (range(i + 1, num_legs_2) if leg_file_2_name is None else range(num_legs_2)): ref_con = Con(legs_1[i], legs_2[j]) if min_separation is None: # inter-chromosomal only if ref_con.is_intra_chr(): continue else: # intra-chromosmal only, remove small separations if not ref_con.is_intra_chr( ) or ref_con.separation() < min_separation: continue num_ref_cons += 1 if num_ref_cons % display_num_ref_cons == 0: sys.stderr.write("[M::" + __name__ + "] analyzed " + str(num_ref_cons) + " reference points\n") # count num_nearby_cons = 0 for con in (con_data.get_cons_near(ref_con, max_distance) if superellipse_mode else con_data.get_cons_near_inf( ref_con, max_distance)): num_nearby_cons += 1 count_matrix[i, j] = num_nearby_cons if leg_file_2_name is None: count_matrix[j, i] = num_nearby_cons # write pariwise count matrix sys.stderr.write("[M::" + __name__ + "] writing output for pairwise count matrix\n") np.savetxt(sys.stdout, count_matrix, fmt='%i', delimiter='\t') return 0
def color(argv): # default parameters color_file_name = None color_mode = None max_distance = None smooth_distance = None max_separation = None radial_mode = False radial_min_num_particles = 10 radial_missing_value = -1.0 radial_max_r = 3.0 radial_bin_r = 0.05 # display parameters disp_num_particles = 1000 # read arguments try: opts, args = getopt.getopt( argv[1:], "c:n:l:m:L:i:s:S:hd:r:I:CD:R", ["min-num=", "missing=", "max-r=", "bin-size="]) except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write("Usage: dip-c color [options] <in.3dg>\n") sys.stderr.write("Options:\n") sys.stderr.write( " -c <color.txt> color by a list of locus-color pairs (tab-delimited: chr, locus, color)\n" ) sys.stderr.write( " -n <chr.txt> color by chromosome name (one chromosome per line)\n" ) sys.stderr.write( " -l <chr.len> color by locus divided by chromosome length (tab-delimited: chr, len)\n" ) sys.stderr.write( " -L <chr.cen> color by arm locus divided by arm length (tab-delimited: chr, len, center of centromere)\n" ) sys.stderr.write( " -h color by distance to homologous locus\n\n") sys.stderr.write( " -i FLOAT color by percentage of intra-homologous neighbors within a given distance\n" ) sys.stderr.write( " -I FLOAT color by number of intra-homologous neighbors within a given distance\n" ) sys.stderr.write( " -S INT (with \"-i\" or \"-I\") max separation (bp) for intra-homologous neighbors\n\n" ) sys.stderr.write( " -d FLOAT color by homolog diversity within a given distance\n" ) sys.stderr.write( " -r FLOAT color by homolog richness within a given distance\n\n" ) sys.stderr.write( " -C color by distance to the nuclear center of mass\n" ) sys.stderr.write( " -D <in.leg> color by distance to a given locus (only the first line of the LEG file will be used)\n\n" ) sys.stderr.write( " -s FLOAT smooth color by averaging over a ball\n\n") sys.stderr.write( " -R special: output average color for different radial distances (normalized to 1.0)\n" ) sys.stderr.write( " --min-num=INT (with \"-R\") min number of particles per bin [" + str(radial_min_num_particles) + "]\n") sys.stderr.write( " --missing=FLOAT (with \"-R\") output value when \"--min-num\" is not met [" + str(radial_missing_value) + "]\n") sys.stderr.write( " --max-r=FLOAT (with \"-R\") max radial distance [" + str(radial_max_r) + "]\n") sys.stderr.write( " --bin-size=FLOAT (with \"-R\") bin size of radial distances [" + str(radial_bin_r) + "]\n\n") sys.stderr.write("Output:\n") sys.stderr.write(" tab-delimited: homolog, locus, color\n") sys.stderr.write( " (with \"-R\") tab-delimited: radial distance, average color, #particles\n" ) return 1 num_color_schemes = 0 for o, a in opts: if o == "-i" or o == "-I" or o == "-d" or o == "-r": num_color_schemes += 1 color_mode = o[1:] max_distance = float(a) elif o == "-s": smooth_distance = float(a) elif o == "-S": max_separation = int(a) elif o == "--min-num": radial_min_num_particles = int(a) elif o == "--missing": radial_missing_value = float(a) elif o == "--max-r": radial_max_r = float(a) elif o == "--bin-size": radial_bin_r = float(a) elif o == "-R": radial_mode = True else: num_color_schemes += 1 color_mode = o[1:] if a != "": color_file_name = a if not max_separation is None and color_mode != "i": sys.stderr.write("[E::" + __name__ + "] \"-S\" must be used with \"-i\"\n") return 1 if num_color_schemes != 1: sys.stderr.write("[E::" + __name__ + "] exactly one color scheme is needed\n") return 1 # read 3DG file g3d_data = file_to_g3d_data(open(args[0], "rb")) g3d_data.sort_g3d_particles() g3d_resolution = g3d_data.resolution() sys.stderr.write( "[M::" + __name__ + "] read a 3D structure with " + str(g3d_data.num_g3d_particles()) + " particles at " + ("N.A." if g3d_resolution is None else str(g3d_resolution)) + " bp resolution\n") # open color file if not color_file_name is None: color_file = open(color_file_name, "rb") # prepare if color_mode is None: pass elif color_mode == "c": ref_name_ref_locus_colors = {} for color_file_line in color_file: ref_name, ref_locus, color = color_file_line.strip().split("\t") ref_locus = int(ref_locus) color = float(color) ref_name_ref_locus_colors[(ref_name, ref_locus)] = color elif color_mode == "n": ref_name_colors = {} color_counter = 0 for color_file_line in color_file: color_counter += 1 ref_name = color_file_line.strip() ref_name_colors[ref_name] = color_counter elif color_mode == "l": ref_lens = {} for color_file_line in color_file: ref_name, ref_len = color_file_line.strip().split("\t") ref_len = int(ref_len) ref_lens[ref_name] = ref_len elif color_mode == "L": ref_lens = {} ref_cens = {} for color_file_line in color_file: ref_name, ref_len, ref_cen = color_file_line.strip().split("\t") ref_len = int(ref_len) ref_cen = int(ref_cen) ref_lens[ref_name] = ref_len ref_cens[ref_name] = ref_cen elif color_mode == "i" or color_mode == "I" or color_mode == "d" or color_mode == "r": g3d_data.prepare_nearby() elif color_mode == "C": hom_names, loci_np_array, position_np_array = g3d_data.to_np_arrays() ref_pos = np.mean(position_np_array, axis=0) sys.stderr.write("[M::" + __name__ + "] reference point (center of mass) is at (" + ", ".join(map(str, ref_pos)) + ")\n") elif color_mode == "D": # fine reference point position ref_leg = string_to_leg(color_file.readline().strip()) g3d_data.prepare_interpolate() is_out, ref_pos = g3d_data.interpolate_leg(ref_leg) sys.stderr.write("[M::" + __name__ + "] reference point (" + ref_leg.to_string() + ") is at (" + ", ".join(map(str, ref_pos)) + ")\n") # calculate colors for each particle color_data = {} atom_id = 0 for g3d_particle in g3d_data.get_g3d_particles(): atom_id += 1 if atom_id % disp_num_particles == 0: sys.stderr.write( "[M::" + __name__ + "] analyzed " + str(atom_id) + " particles (" + str(round(100.0 * atom_id / g3d_data.num_g3d_particles(), 2)) + "%)\n") # color if color_mode == "c": try: color = ref_name_ref_locus_colors[( g3d_particle.get_ref_name(), g3d_particle.get_ref_locus())] except KeyError: continue elif color_mode == "n": try: color = ref_name_colors[g3d_particle.get_ref_name()] except KeyError: continue elif color_mode == "l": try: color = float(g3d_particle.get_ref_locus()) / ref_lens[ g3d_particle.get_ref_name()] except KeyError: continue elif color_mode == "L": try: arm_locus = g3d_particle.get_ref_locus() - ref_cens[ g3d_particle.get_ref_name()] if arm_locus > 0: arm_len = ref_lens[g3d_particle.get_ref_name()] - ref_cens[ g3d_particle.get_ref_name()] else: arm_len = ref_cens[g3d_particle.get_ref_name()] color = float(abs(arm_locus)) / arm_len except KeyError: continue elif color_mode == "i": color = intra_hom_fraction( g3d_particle, g3d_data.get_g3d_particles_near(g3d_particle.get_position(), max_distance), max_separation) if color is None: continue elif color_mode == "I": color = intra_hom_count( g3d_particle, g3d_data.get_g3d_particles_near(g3d_particle.get_position(), max_distance), max_separation) elif color_mode == "h": homologous_g3d_particle = g3d_data.get_g3d_particle_from_hom_name_ref_locus( homologous_hom_name(g3d_particle.get_hom_name()), g3d_particle.get_ref_locus()) if homologous_g3d_particle is None: continue color = math.sqrt( (g3d_particle.get_x() - homologous_g3d_particle.get_x())**2 + (g3d_particle.get_y() - homologous_g3d_particle.get_y())**2 + (g3d_particle.get_z() - homologous_g3d_particle.get_z())**2) elif color_mode == "d": color = hom_diversity( g3d_data.get_g3d_particles_near(g3d_particle.get_position(), max_distance)) elif color_mode == "r": color = hom_richness( g3d_data.get_g3d_particles_near(g3d_particle.get_position(), max_distance)) elif color_mode == "C" or color_mode == "D": color = math.sqrt((g3d_particle.get_x() - ref_pos[0])**2 + (g3d_particle.get_y() - ref_pos[1])**2 + (g3d_particle.get_z() - ref_pos[2])**2) #sys.stderr.write(str(color) + "\n") color_data[g3d_particle.get_hom_name(), g3d_particle.get_ref_locus()] = color # smoothing if not smooth_distance is None: g3d_data.prepare_nearby() smooth_color_data = {} atom_id = 0 for g3d_particle in g3d_data.get_g3d_particles(): atom_id += 1 if atom_id % disp_num_particles == 0: sys.stderr.write( "[M::" + __name__ + "] smoothed " + str(atom_id) + " particles (" + str( round(100.0 * atom_id / g3d_data.num_g3d_particles(), 2)) + "%)\n") color = smooth_color( g3d_particle, g3d_data.get_g3d_particles_near(g3d_particle.get_position(), smooth_distance), color_data) if not color is None: smooth_color_data[g3d_particle.get_hom_name(), g3d_particle.get_ref_locus()] = color color_data = smooth_color_data # radial if radial_mode: num_radial_bins = int(radial_max_r / radial_bin_r) + 1 radial_color_sums = [0.0] * num_radial_bins radial_color_nums = [0] * num_radial_bins # calculate center of mass, and normalization factor hom_names, loci_np_array, position_np_array = g3d_data.to_np_arrays() ref_pos = np.mean(position_np_array, axis=0) mean_radial = np.mean(np.sum((position_np_array - ref_pos)**2, axis=-1)**0.5, axis=0) sys.stderr.write("[M::" + __name__ + "] radial mode: average radial distance = " + str(mean_radial) + ", which will be normalize to 1.0\n") # examine each particle for g3d_particle in g3d_data.get_g3d_particles(): atom_id += 1 if atom_id % disp_num_particles == 0: sys.stderr.write( "[M::" + __name__ + "] radial mode for " + str(atom_id) + " particles (" + str( round(100.0 * atom_id / g3d_data.num_g3d_particles(), 2)) + "%)\n") if (g3d_particle.get_hom_name(), g3d_particle.get_ref_locus()) not in color_data: continue color = color_data[g3d_particle.get_hom_name(), g3d_particle.get_ref_locus()] radial = math.sqrt( (g3d_particle.get_x() - ref_pos[0])**2 + (g3d_particle.get_y() - ref_pos[1])**2 + (g3d_particle.get_z() - ref_pos[2])**2) / mean_radial radial_bin_id = int(radial / radial_bin_r + 0.5) #sys.stderr.write(str(radial)+", " + str(radial_bin_id) + "=" + str(radial_bin_id*radial_bin_r)+ ", "+ str(color)+"\n") if radial_bin_id >= num_radial_bins: continue # out of bound, skip radial_color_sums[radial_bin_id] += color radial_color_nums[radial_bin_id] += 1 # output sys.stderr.write("[M::" + __name__ + "] writing radial mode output\n") for radial_bin_id in range(num_radial_bins): if radial_color_nums[radial_bin_id] < radial_min_num_particles: output_value = radial_missing_value else: output_value = radial_color_sums[ radial_bin_id] / radial_color_nums[radial_bin_id] sys.stdout.write("\t".join([ str(radial_bin_id * radial_bin_r), str(output_value), str(radial_color_nums[radial_bin_id]) ]) + "\n") return 0 # output sys.stderr.write( "[M::" + __name__ + "] writing " + str(len(color_data)) + " colors (" + str(round(100.0 * len(color_data) / g3d_data.num_g3d_particles(), 2)) + "%)\n") for hom_name, ref_locus in sorted(color_data.keys()): sys.stdout.write("\t".join( [hom_name, str(ref_locus), str(color_data[(hom_name, ref_locus)])]) + "\n") return 0
def bincon(argv): # default parameters chr_len_file_name = None matrix_bin_size = 1000000 merge_haplotypes = False info_mode = False leg_mode = False min_separation = 0 # progress display parameters display_num_cons = 1e4 # read arguments try: opts, args = getopt.getopt(argv[1:], "l:b:HiLs:") except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write( "Usage: dip-c bincon [options] -l <chr.len> <in.3dg>\n") sys.stderr.write("Options:\n") sys.stderr.write( " -l <chr.len> file containing chromosome lengths (tab-delimited: chr, len)\n" ) sys.stderr.write(" -L analyze LEG instead of CON\n") sys.stderr.write( " -b INT bin size (bp) (bins are centered around multiples of bin size) [" + str(matrix_bin_size) + "]\n") sys.stderr.write(" -H merge the two haplotypes\n") sys.stderr.write( " -s INT min separation (bp) for intra-chromosomal contacts [" + str(min_separation) + "]\n") sys.stderr.write( " -i output bin info (tab-delimited: homolog or chr if \"-H\", bin center) instead\n" ) return 1 num_color_schemes = 0 for o, a in opts: if o == "-l": matrix_mode = True chr_len_file_name = a elif o == "-s": min_separation = int(a) elif o == "-b": matrix_bin_size = int(a) elif o == "-H": merge_haplotypes = True elif o == "-i": info_mode = True elif o == "-L": leg_mode = True if chr_len_file_name is None: sys.stderr.write("[E::" + __name__ + "] -l is required\n") return 1 # read chromosome lengths hom_lens = {} hom_bin_lens = {} hom_offsets = {} matrix_size = 0 chr_len_file = open(chr_len_file_name, "rb") for chr_len_file_line in chr_len_file: ref_name, ref_len = chr_len_file_line.strip().split("\t") ref_len = int(ref_len) for haplotype in ([Haplotypes.paternal] if merge_haplotypes else [Haplotypes.paternal, Haplotypes.maternal]): hom_name = ref_name_haplotype_to_hom_name((ref_name, haplotype)) hom_bin_len = int(round(float(ref_len) / matrix_bin_size)) + 1 hom_lens[hom_name] = ref_len hom_bin_lens[hom_name] = hom_bin_len hom_offsets[hom_name] = matrix_size matrix_size += hom_bin_len if info_mode: for bin_id in range(hom_bin_len): sys.stdout.write("\t".join( [(ref_name if merge_haplotypes else hom_name), str(bin_id * matrix_bin_size)]) + "\n") # generate matrix if not info_mode: if leg_mode: matrix_data = np.zeros((matrix_size, 1), dtype=int) for leg_file_line in open(args[0], "rb"): leg = string_to_leg(leg_file_line.strip()) matrix_data[leg_to_matrix_index(leg, hom_offsets, matrix_bin_size, merge_haplotypes)] += 1 else: con_file = gzip.open(args[0], "rb") if args[0].endswith(".gz") else open( args[0], "rb") con_data = file_to_con_data(con_file) con_data.clean_separation(min_separation) sys.stderr.write("[M::" + __name__ + "] read " + str(con_data.num_cons()) + " putative contacts (" + str( round( 100.0 * con_data.num_intra_chr() / con_data.num_cons(), 2)) + "% intra-chromosomal, " + str( round( 100.0 * con_data.num_phased_legs() / con_data.num_cons() / 2, 2)) + "% legs phased)\n") matrix_data = con_data_to_matrix(con_data, hom_offsets, matrix_bin_size, matrix_size, merge_haplotypes, display_num_cons) np.savetxt(sys.stdout, matrix_data, fmt='%i', delimiter='\t') return 0
def pd(argv): # default parameters leg_file_1_name = None leg_file_2_name = None # read arguments try: opts, args = getopt.getopt(argv[1:], "1:2:") except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write( "Usage: dip-c pd [options] -1 <in1.leg> [-2 <in2.leg>] <in.3dg>\n") sys.stderr.write("Options:\n") sys.stderr.write(" -1 <in1.leg> LEG file (required)\n") sys.stderr.write(" -2 <in2.leg> LEG file [<in1.leg>]\n") return 1 for o, a in opts: if o == "-1": leg_file_1_name = a elif o == "-2": leg_file_2_name = a if leg_file_1_name is None: sys.stderr.write("[E::" + __name__ + "] -1 is required\n") return 1 if leg_file_2_name is None: leg_file_2_name = leg_file_1_name # read 3DG file g3d_data = file_to_g3d_data(open(args[0], "rb")) g3d_data.sort_g3d_particles() g3d_resolution = g3d_data.resolution() sys.stderr.write( "[M::" + __name__ + "] read a 3D structure with " + str(g3d_data.num_g3d_particles()) + " particles at " + ("N.A." if g3d_resolution is None else str(g3d_resolution)) + " bp resolution\n") g3d_data.prepare_interpolate() # convert LEG file to 3DG particles positions_1 = np.empty([0, 3]) for leg_file_1_line in open(leg_file_1_name, "rb"): is_out, position = g3d_data.interpolate_leg( string_to_leg(leg_file_1_line.strip())) if position is None: position = np.array([np.nan, np.nan, np.nan]) positions_1 = np.vstack([positions_1, position]) positions_2 = np.empty([0, 3]) for leg_file_2_line in open(leg_file_2_name, "rb"): is_out, position = g3d_data.interpolate_leg( string_to_leg(leg_file_2_line.strip())) if position is None: position = np.array([np.nan, np.nan, np.nan]) positions_2 = np.vstack([positions_2, position]) # calculate pairwise distances distances = distance.cdist(positions_1, positions_2) np.savetxt(sys.stdout, distances, delimiter='\t') return 0