def test_only_one_time_point(self): """ Testing if files with just one coordinate for time are read correctly """ # print >> stderr,"====================== test 9" global int_data_electro, mappings_electro msg_mappings = "Equivalences set in electrophysiology mapping file are not correct." mappings_electro = mapping.MappingInfo(PATH + "/electrophysiology/e2p.txt") keys_electro = ['track', 'data_types', 'data_value', 'start'] fields = mappings_electro.correspondence.values() self.assertEqual(keys_electro, fields, msg_mappings) min = 0 max = 0.3 msg_int_data_min = "Min value in electrophysiology data intData not correctly read." msg_int_data_max = "Max value in electrophysiology data intData not correctly read." int_data_electro = intervals.IntData( PATH + "/electrophysiology/electroTest_2f.txt", map_dict=mappings_electro.correspondence) self.assertEqual(int_data_electro.min, min, msg_int_data_min) self.assertEqual(int_data_electro.max, max, msg_int_data_max)
def test_02_read_int_data(self): """ Testing the creation of intData object using tutorial data """ # print >> stderr,"====================== test 2" global data_read # Min value from tutorial file min = 1335985200 max = 1337766069 msg_int_data_min = "Min value does not correspond to tutorial files." msg_int_data_max = "Max value does not correspond to tutorial files." int_data_tutorial = intervals.IntData( PATH + "/feeding/feeding_behavior_HF_mice.csv", map_dict=mappings_tutorial.correspondence) self.assertEqual(int_data_tutorial.min, min, msg_int_data_min) self.assertEqual(int_data_tutorial.max, max, msg_int_data_max) data_read = int_data_tutorial.read(relative_coord='False', intervals=False, multiply_t=1)
def pergola_rules(path, map_file_path, sel_tracks=None, list=None, range=None, track_actions=None, data_types_actions=None, data_types_list=None, write_format=None, relative_coord=False, intervals_gen=False, multiply_f=None, no_header=False, fields2read=None, window_size=None, no_track_line=False, separator=None, bed_lab_sw=False, color_dict=None, window_mean=False, value_mean=False, min_t=None, max_t=None, interval_step=None): print >> stderr, "@@@Pergola_rules.py: Input file: %s" % path print >> stderr, "@@@Pergola_rules.py: Configuration file: %s" % map_file_path # Tracks selected by user print >> stderr, "@@@Pergola_rules.py: Selected tracks are: ", sel_tracks # Configuration file map_file_dict = mapping.MappingInfo(map_file_path) # Reading color dictionary to set data_types if color_dict: print >> stderr, "@@@Pergola_rules.py: Color for data_types in file............ %s" % color_dict d_colors_data_types = parsers.read_colors (color_dict) else: d_colors_data_types = None # Handling list or range of tracks to join if set if list and range: raise ValueError("@@@Pergola_rules.py: Argument -l/--list and -r/--range are not compatible. " \ "As both arguments set tracks to be joined.") elif (list): tracks2merge = list elif (range): tracks2merge = range else: tracks2merge = "" if tracks2merge: print >> stderr, "@@@Pergola_rules.py: Tracks to join are............ ", tracks2merge # Handling argument track actions if tracks2merge and track_actions: raise ValueError ("Options --list -l or --range -r are incompatible with " \ "--track_actions -a, please change your options") track_act = track_actions print >> stderr, "@@@Pergola_rules.py: Track actions are.............................. ", track_act data_types_list = data_types_list print >> stderr, "@@@Pergola_rules.py: data_types list is: ", data_types_list # Handling argument data_types actions data_types_act = data_types_actions print >> stderr, "@@@Pergola_rules.py: data_types actions are......................... ", data_types_act # Handling argument format if write_format: print >> stderr, "@@@Pergola_rules.py format to write files....................... ", write_format else: write_format = 'bed' print >>stderr, "@@@Pergola_rules.py format to write files has been set" \ " to default value:", write_format # Handling relative coordinates print >> stderr, "@@@Pergola_rules.py: Relative coordinates set to................. %s" % relative_coord # Handling intervals_gen print >> stderr, "@@@Pergola_rules.py: Intervals parameter set to.................. %s" % intervals_gen # Handling interval_step if interval_step: if intervals_gen: print >> stderr, "@@@Pergola_rules.py: Interval step set to........................ %s" % interval_step else: raise ValueError("Interval step needs intervals paramater to be set -n/--intervals_gen") # Handling multiply_intervals if multiply_f: print >>stderr, "@@@Pergola_rules.py: Multiply intervals parameter set to...... %s" % multiply_f else: multiply_f = 1 print >> stderr, "@@@Pergola_rules.py: Selected tracks are......................... ", sel_tracks # Setting whether input file has header or not header_sw = True if no_header: header_sw = False print >> stderr, "@@@Pergola_rules.py: Data file has header set to............. ", header_sw # Handling fields to read if fields2read: print >>stderr, "@@@Pergola_rules.py: Fields to read from the file are......... %s" % fields2read else: fields2read = None # When binning data setting the window of time used in seconds # if not size provided set to False if window_size: print >>stderr, "@@@Pergola_rules.py: Window size set to....................... %d" % window_size else: # window_size = 300 window_size = False print >>stderr, "@@@Pergola_rules.py: Window size set by default to............ %d" % window_size if window_mean: print >>stderr, "@@@Pergola_rules.py: Window mean set to....................... %d" % window_mean else: window_mean = False if value_mean: print >> stderr, "@@@Pergola_rules.py: Value mean set to....................... %d" % value_mean else: value_mean = False if no_track_line: track_line=False else: track_line=True print >>stderr, "@@@Pergola_rules.py: track_line set to............................ %s" % track_line # Handling input file field delimiter if not separator: separator = "\t" print >> stderr, "@@@Pergola_rules.py input file field separator set by default to...... \"\\t\"." else: print >>stderr, "@@@Pergola_rules.py input file field separator set to..... \"%s\"" % separator if bed_lab_sw: bed_lab = True print >>stderr, "@@@Pergola_rules.py: bed_label set to......................... %s" % bed_lab else: bed_lab = False intData = intervals.IntData(path, map_dict=map_file_dict.correspondence, fields_names=fields2read, header=header_sw, delimiter=separator) start = intData.min end = intData.max if relative_coord: start = 0 end = intData.max - intData.min print >> stderr, "@@@Pergola_rules.py: min time in file......................... %d" % start print >> stderr, "@@@Pergola_rules.py: max time in file......................... %d" % end if min_t or min_t == 0: min_time = min_t print >> stderr, "@@@Pergola_rules.py: min_time set by user to.............. %d" % min_t else: min_time = start if max_t: max_time = max_t print >> stderr, "@@@Pergola_rules.py: max_time set by user to............... %d" % max_t else: if interval_step: max_time = end + interval_step else: max_time = end + 1 if multiply_f: min_time = min_time * multiply_f max_time = max_time * multiply_f if track_act: tracks2merge = parsers.read_track_actions(tracks=intData.tracks, track_action=track_act) data_read = intData.read(relative_coord=relative_coord, intervals=intervals_gen, multiply_t=multiply_f, min_time=min_time, max_time=max_time, int_step=interval_step) mapping.write_chr(data_read)#mantain mapping.write_chr_sizes(data_read) # writes cytoband and light, dark and light_dark bed files mapping.write_cytoband(end=end, track_line=track_line, lab_bed=False) # mapping.write_period_seq(start=0, end=intData.max, delta=43200, name_file="phases_dark", track_line=False) data_read.save_track(name_file="all_intervals") bed_str = data_read.convert(mode=write_format, tracks=sel_tracks, tracks_merge=tracks2merge, data_types=data_types_list, data_types_actions=data_types_act, window=window_size, mean_win=window_mean, mean_value=value_mean, color_restrictions=d_colors_data_types) #min_t_trim=min_t, max_t_trim=max_t) for key in bed_str: bedSingle = bed_str[key] bedSingle.save_track(track_line=track_line, bed_label=bed_lab)
### Drinking data elif args.behavioral_type == 'drinking': data_type_1 = "water" data_type_2 = "saccharin" data_type_col = {data_type_1: 'blue', data_type_2: 'red'} else: print >> stderr, "Behavioral data type not available in script, please try again with \"drinking\" or \"feeding\"" mapping_data = mapping.MappingInfo(args.mapping_file) behavior_mice = dict() end_time = -10000 data_read_all_batches = None for f in args.file_mice_behavior: int_data = intervals.IntData(f, map_dict=mapping_data.correspondence) data_read = int_data.read(relative_coord=True) chr_file_n = "chrom" mapping.write_chr_sizes(data_read, file_n=chr_file_n) chr_file = chr_file_n + ".sizes" if end_time < int_data.max - int_data.min: end_time = int_data.max - int_data.min if not data_read_all_batches: data_read_all_batches = data_read else: data_read_all_batches = tracks.merge_tracks(data_read_all_batches, data_read)
data_type_1 = "food_sc" data_type_2 = "food_fat" data_type_col = {data_type_1: 'orange', data_type_2: 'black'} ### Drinking data elif args.behavioral_type == 'drinking': data_type_1 = "water" data_type_2 = "saccharin" data_type_col = {data_type_1: 'blue', data_type_2: 'red'} else: print >> stderr, "Behavioral data type not available in script, please try again with \"drinking\" or \"feeding\"" mapping_data = mapping.MappingInfo( "../../sample_data/feeding_behavior/b2g.txt") int_data_b1 = intervals.IntData( "../../sample_data/feeding_beh_CB1_mice/intake_CB1_B1.csv", map_dict=mapping_data.correspondence) int_data_b2 = intervals.IntData( "../../sample_data/feeding_beh_CB1_mice/intake_CB1_B2.csv", map_dict=mapping_data.correspondence) int_data_b3 = intervals.IntData( "../../sample_data/feeding_beh_CB1_mice/intake_CB1_B3.csv", map_dict=mapping_data.correspondence) int_data_b4 = intervals.IntData( "../../sample_data/feeding_beh_CB1_mice/intake_CB1_B4.csv", map_dict=mapping_data.correspondence) mapping_bed = mapping.MappingInfo("../../test/pybed2perg.txt") # base_dir = path.dirname(getcwd()) base_dir = getcwd()
# # mapping_bed = mapping.MappingInfo(base_dir + "/test/" + "bed2pergola.txt") mapping_bed = mapping.MappingInfo(args.bed_mapping) ## mapping_bed = mapping.MappingInfo("/Users/jespinosa/git/pergola/test/c_elegans_data_test/bed2pergola.txt") # speed bed file ## read file from input args # bed_speed_file = dir_development + "midbody.575_JU440_on_food_L_2011_02_17__16_43___3___11_features_speed.csv.bed" # bed_speed_file ='/Users/jespinosa/git/pergola/test/c_elegans_data_test/results_GB/midbody.575_JU440_on_food_L_2011_02_17__11_00___3___1_features.mat.GB.bed' ## bed_speed_file = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/work/be/c8a7942756ee7053d0f9856e1caa88/bed_speed_no_tr' ## error to debug nextflow Paolo # bed_speed_file = dir_development + args.speed bed_speed_file = args.speed int_data_speed = intervals.IntData(bed_speed_file, map_dict=mapping_bed.correspondence, header=False, fields_names=['chrm', 'start', 'end', 'nature', 'value', 'strain', 'color']) speed_data_read = int_data_speed.read(relative_coord=False) # speed_data_read.data ################### # Generate to BedTool objects containing motion type (forward, backward, paused) # motion_bed_file = dir_development + "575_JU440_on_food_L_2011_02_17__11_00___3___1_features_forward.csv.bed" # motion_bed_file = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/work/be/c8a7942756ee7053d0f9856e1caa88/bed_speed_no_tr/motion_file' ## motion_bed_file = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/work/8a/b26a0fbfb292bb573e582ef842b646/tr_1_dt_a.bed' # motion_bed_file = dir_development + args.motion motion_bed_file = args.motion int_data_motion = intervals.IntData(motion_bed_file, map_dict=mapping_bed.correspondence, header=False, fields_names=['chrm', 'start', 'end', 'nature', 'value', 'strain', 'color'])
### ### ### ### ### ### ### ### ### ### ################################################################ from pergola import mapping, intervals, tracks from os import path, getcwd base_dir = path.dirname(getcwd()) out_dir = base_dir + "/test/" mapping_data = mapping.MappingInfo(base_dir + "/sample_data/feeding_behavior/b2g.txt") int_data = intervals.IntData(base_dir + "/sample_data/feeding_behavior/feedingBehavior_HF_mice.csv", map_dict=mapping_data.correspondence) # Dictionary to set colors of each type of food data_type_col = {'food_sc': 'orange', 'food_fat':'blue'} data_read = int_data.read(relative_coord=True) # for i in data_read.data: print i data_read.data_types data_read.fields # bed_str = data_read.convert(mode="bed") # bedGraph_str = data_read.convert(mode="bedGraph") gff_str = data_read.convert(mode="gff", data_types=["food_sc", "food_fat"], data_types_actions="all", color_restrictions=data_type_col)
import pybedtools # import sys # my_path_to_modules = "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/" # sys.path.append(my_path_to_modules) from pergola import mapping from pergola import intervals mapping_data = mapping.MappingInfo( "/Users/jespinosa/git/pergola/sample_data/feeding_behavior/b2g.txt") int_data = intervals.IntData( "/Users/jespinosa/git/pergola/sample_data/feeding_behavior/feedingBehavior_HF_mice.csv", map_dict=mapping_data.correspondence) data_read = int_data.read(relative_coord=True) data_type_col = {'food_sc': 'orange', 'food_fat': 'blue'} bed_str = data_read.convert(mode="bed", data_types=["food_sc", "food_fat"], dataTypes_actions="all", color_restrictions=data_type_col) # bed_tr1_food_sc = bed_str[('1', 'food_sc')] # bedTools_tr1 = bed_tr1_food_sc.create_pybedtools()
backward_file = args.backward_file bed_mapping = args.bed_mapping chrom_sizes = args.chrom_sizes # forward_file = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/results_motion_GB/575_JU440_on_food_L_2011_02_17__11_00___3___1_features.matfile_worm.backward.csv.motion.bed' # backward_file = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/results_motion_GB/575_JU440_on_food_L_2011_02_17__11_00___3___1_features.matfile_worm.forward.csv.motion.bed' # bed_mapping = '/Users/jespinosa/git/pergola/test/c_elegans_data_test/bed2pergola.txt' chr_file_n = "chrom.sizes" mapping_bed = mapping.MappingInfo(bed_mapping) forward = intervals.IntData(forward_file, map_dict=mapping_bed.correspondence, header=False, fields_names=[ 'chrm', 'start', 'end', 'nature', 'value', 'strain', 'color' ]) forward_read = forward.read(relative_coord=False) forward_bed_obj = forward_read.convert(mode="bed")['chr1', '.'].create_pybedtools() backward = intervals.IntData(backward_file, map_dict=mapping_bed.correspondence, header=False, fields_names=[ 'chrm', 'start', 'end', 'nature', 'value', 'strain', 'color' ]) backward_read = backward.read(relative_coord=False)
chase_score_f = args.score_file mappings_jaaba = mapping.MappingInfo(args.mapping_file) tag_group = args.tag_group annotated_behavior = args.behavior tmp_track = NamedTemporaryFile(prefix='jaaba_csv', suffix='.csv', delete=True) name_tmp = splitext(basename(tmp_track.name))[0] path_out = dirname(abspath(tmp_track.name)) jaaba_parsers.jaaba_scores_to_csv(input_file=chase_score_f, path_w=path_out, name_file=name_tmp, norm=True, data_type=annotated_behavior) scores_chase_int = intervals.IntData( tmp_track.name, map_dict=mappings_jaaba.correspondence).read() data_type_col = {annotated_behavior: 'blue'} if tag_group == "pBDPGAL4": # data_type_col = {annotated_behavior: 'orange'} data_type_col = {annotated_behavior: 'red'} dict_bed_annotated_int = scores_chase_int.convert( mode="bed", color_restrictions=data_type_col) chr_file_n = "chrom" mapping.write_chr_sizes(scores_chase_int, file_n=chr_file_n) mapping.write_chr(scores_chase_int) chr_file = chr_file_n + ".sizes"
### the above described operations. ### ################################################################ import pybedtools from os import path, getcwd from pergola import mapping from pergola import intervals base_dir = path.dirname(getcwd()) out_dir = base_dir + "/test/" mapping_data = mapping.MappingInfo(base_dir + "/sample_data/feeding_behavior/b2g.txt") int_data = intervals.IntData( base_dir + "/sample_data/feeding_behavior/feedingBehavior_HF_mice.csv", map_dict=mapping_data.correspondence) data_read = int_data.read(relative_coord=True) ################### # Generate to BedTool objects containing light and dark phases # Write phases file mapping.write_cytoband(end=int_data.max - int_data.min, delta=43200, start_phase="dark") light_ph_f = out_dir + "phases_light.bed" dark_ph_f = out_dir + "phases_dark.bed" light_bed = pybedtools.BedTool(light_ph_f)
tag_file = "mean_speed_i_motionDir" print >> stderr, "Bed speed file: %s" % args.phenotypic_file print >> stderr, "Mapping bed to Pergola file: %s" % args.bed_mapping_file print >> stderr, "Output tag file: %s" % tag_file mapping_bed = mapping.MappingInfo(args.bed_mapping_file) # mapping_bed = mapping.MappingInfo("/Users/jespinosa/git/pergola/test/c_elegans_data_test/bed2pergola.txt") bed_ph_file = args.phenotypic_file # bed_ph_file = '/Users/jespinosa/git/pergola/examples/N2_hourly_mean_measures/bin/bed_debug.bed' int_data_phenotypic = intervals.IntData(bed_ph_file, map_dict=mapping_bed.correspondence, header=False, fields_names=[ 'chrm', 'start', 'end', 'nature', 'value', 'strain', 'color' ]) phenotypic_data_read = int_data_phenotypic.read(relative_coord=False) bed_obj_phenotypic = phenotypic_data_read.convert(mode="bed") key_s = bed_obj_phenotypic.keys()[0] phenotypic_feature_bt = bed_obj_phenotypic[key_s].create_pybedtools() ### Getting mean value of the intervals of the file containing the phenotypic feature: ## Generates a bed file of a single interval of the size of the whole bed file list_full_length = [ (phenotypic_feature_bt[0]["chrom"], phenotypic_feature_bt[0]["start"], phenotypic_feature_bt[phenotypic_feature_bt.count() - 1]["end"], 0) ]
#!/usr/bin/env python from pergola import mapping from pergola import intervals from pergola import parsers mapping_info = mapping.MappingInfo( "/Users/jespinosa/git/pergola/sample_data/feeding_behavior/b2g.txt") mapping_info.write() # load the data into an IntData object that will store the sequence of events int_data = intervals.IntData( "/Users/jespinosa/git/pergola/sample_data/feeding_behavior/feedingBehavior_HF_mice.csv", map_dict=mapping_info.correspondence, delimiter="\t") print(int_data.data[:12]) print(int_data.fieldsB) print(int_data.fieldsG_dict) # print (int_data.fieldsG_dict.keys()) print(int_data.dataTypes) print(int_data.tracks) print "debugging_simple_read---- min value in int_data_feeding ------------------------", int_data.min print "debugging_simple_read---- max value in int_data_feeding ------------------------", int_data.max track_data = int_data.read(relative_coord=True) print "track object files are============+++++++++++++++++++", track_data.fields track_data.convert(mode="bed") #electro for single time point validation