def run_bedmap(doe_csv, header_name_of_exp_id, header_names_of_factors, in_map_dir, parent, extension, out_dir): qname = 'regevlab' mem_usage = '5000' bed_map_fn_dict = doe_reader.create_experiment_fns(doe_csv, header_name_of_exp_id, in_map_dir, '.'+parent+extension) factors_dict = doe_reader.read_experiment_fields(doe_csv, header_name_of_exp_id, header_names_of_factors.split(',')) factors_name = [name for name in factors_dict] factors_set = set([':'.join(factors_dict[name]) for name in factors_dict]) factors_name_corresponding_set = [0 for i in range(0,len(factors_name))] i = 0 for name, factors in factors_dict.iteritems(): factors = ':'.join(factors) index_of_factors = general.index_in_unique_list(factors_set, factors) factors_name_corresponding_set[i] = index_of_factors i+=1 files = [open(bed_map_fn_dict[name]) for name in bed_map_fn_dict] out_files = [open(os.path.join(out_dir, factor+'.%s.bedgraph' %(parent)), 'w') for factor in factors_set] for lines in izip(*files): lines_to_print = average(lines, factors_name_corresponding_set) f_index = 0 for line_to_print in lines_to_print: out_files[f_index].write(line_to_print) f_index += 1 close_lst_of_fhs(files) close_lst_of_fhs(out_files) return 0
def read_experiment_field(doe_csv, header_name_of_exp_id, header_name): ''' this function takes a doe_csv fullname, opens the file, and returns a dictionary {'exp_id':'field_value'} Assumption: unique exp_id ''' field_dict = {} with open(doe_csv,'rb') as csvfile: reader = csv.reader(csvfile) headers = reader.next() header_index_of_exp_id = general.index_in_unique_list(headers, header_name_of_exp_id) header_index_of_header_name = general.index_in_unique_list(headers, header_name) for row in reader: exp_id = row[header_index_of_exp_id] field_value = row[header_index_of_header_name] field_dict[exp_id] = field_value return field_dict
def read_experiment_field(doe_csv, header_name_of_exp_id, header_name): ''' this function takes a doe_csv fullname, opens the file, and returns a dictionary {'exp_id':'field_value'} Assumption: unique exp_id ''' field_dict = {} with open(doe_csv, 'rb') as csvfile: reader = csv.reader(csvfile) headers = reader.next() header_index_of_exp_id = general.index_in_unique_list( headers, header_name_of_exp_id) header_index_of_header_name = general.index_in_unique_list( headers, header_name) for row in reader: exp_id = row[header_index_of_exp_id] field_value = row[header_index_of_header_name] field_dict[exp_id] = field_value return field_dict
def create_experiment_fns(doe_csv, header_name_of_exp_id, in_dir, extension_name): ''' this function takes a doe_csv fullname, opens the file, and returns a dictionary {'exp_id':'fullname_to_exp_file'} Assumption: unique exp_id, and all files in the same dir with the same extension ''' field_dict = {} with open(doe_csv,'rb') as csvfile: reader = csv.reader(csvfile) headers = reader.next() header_index_of_exp_id = general.index_in_unique_list(headers, header_name_of_exp_id) for row in reader: exp_id = row[header_index_of_exp_id] field_dict[exp_id] = os.path.join(in_dir, exp_id+extension_name) return field_dict
def create_experiment_fns(doe_csv, header_name_of_exp_id, in_dir, extension_name): ''' this function takes a doe_csv fullname, opens the file, and returns a dictionary {'exp_id':'fullname_to_exp_file'} Assumption: unique exp_id, and all files in the same dir with the same extension ''' field_dict = {} with open(doe_csv, 'rb') as csvfile: reader = csv.reader(csvfile) headers = reader.next() header_index_of_exp_id = general.index_in_unique_list( headers, header_name_of_exp_id) for row in reader: exp_id = row[header_index_of_exp_id] field_dict[exp_id] = os.path.join(in_dir, exp_id + extension_name) return field_dict
def run_trim_galore(logs_dir, out_dir, doe_csv_fn, header_name_of_in_fn, header_name_of_adapter_seq, header_name_of_read_length, trim_galore_options, rm_shorter_than_space, tissue): trim_galore_options_list = trim_galore_options.split(' ') clip_R1_value_index = general.index_in_unique_list(trim_galore_options_list, '--clip_R1')+1 print trim_galore_options_list[clip_R1_value_index] clip_R1_value = int(trim_galore_options_list[clip_R1_value_index]) qname = 'regevlab' mem_usage = '5000' dict_fq_fns = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_in_fn) dict_adapter_seq = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_adapter_seq) dict_read_length = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_read_length) myos.remove_all_files_given_dir(out_dir) myos.check_if_directory_exists_create_it(out_dir) for exp_name, in_fn in dict_fq_fns.iteritems(): adapter_seq = dict_adapter_seq[exp_name] read_length = int(dict_read_length[exp_name]) bsubcmd = myos.create_bsub_string_no_rm_logs_dir(logs_dir, exp_name+'_'+tissue, qname = qname, mem_usage = mem_usage) runcmd_tgf = execs_commands.trim_galore_filter(adapter_seq, trim_galore_options+' --length %s' %(read_length-clip_R1_value-rm_shorter_than_space), in_fn, out_dir) fullcmd = bsubcmd+'\"'+runcmd_tgf+'\"' print fullcmd myos.write_fullcmd(fullcmd, logs_dir, exp_name+'_'+tissue) os.system(fullcmd) return 0