Esempio n. 1
0
def run_bedmap(doe_csv, header_name_of_exp_id, header_names_of_factors, in_map_dir, parent, extension, out_dir):
    qname = 'regevlab'
    mem_usage = '5000'
    bed_map_fn_dict = doe_reader.create_experiment_fns(doe_csv, header_name_of_exp_id, in_map_dir, '.'+parent+extension)
    factors_dict = doe_reader.read_experiment_fields(doe_csv, header_name_of_exp_id, header_names_of_factors.split(','))
    factors_name = [name for name in factors_dict]
    factors_set = set([':'.join(factors_dict[name]) for name in factors_dict])
    factors_name_corresponding_set = [0 for i in range(0,len(factors_name))]
    i = 0
    for name, factors in factors_dict.iteritems():
        factors = ':'.join(factors)
        index_of_factors = general.index_in_unique_list(factors_set, factors)
        factors_name_corresponding_set[i] = index_of_factors
        i+=1
    files = [open(bed_map_fn_dict[name]) for name in bed_map_fn_dict]
    out_files = [open(os.path.join(out_dir, factor+'.%s.bedgraph' %(parent)), 'w') for factor in factors_set]
    for lines in izip(*files):
        lines_to_print = average(lines, factors_name_corresponding_set)
        f_index = 0
        for line_to_print in lines_to_print:
            out_files[f_index].write(line_to_print)
            f_index += 1
    close_lst_of_fhs(files)
    close_lst_of_fhs(out_files)
    return 0
Esempio n. 2
0
def read_experiment_field(doe_csv, header_name_of_exp_id, header_name):
    ''' this function takes a doe_csv fullname, opens the file,
    and returns a dictionary {'exp_id':'field_value'}
    Assumption: unique exp_id
    '''
    field_dict = {}
    with open(doe_csv,'rb') as csvfile:
        reader = csv.reader(csvfile)
        headers = reader.next()
        header_index_of_exp_id = general.index_in_unique_list(headers, header_name_of_exp_id)
        header_index_of_header_name = general.index_in_unique_list(headers, header_name)
        for row in reader:
            exp_id = row[header_index_of_exp_id]
            field_value = row[header_index_of_header_name]
            field_dict[exp_id] = field_value
    return field_dict
Esempio n. 3
0
def read_experiment_field(doe_csv, header_name_of_exp_id, header_name):
    ''' this function takes a doe_csv fullname, opens the file,
    and returns a dictionary {'exp_id':'field_value'}
    Assumption: unique exp_id
    '''
    field_dict = {}
    with open(doe_csv, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        headers = reader.next()
        header_index_of_exp_id = general.index_in_unique_list(
            headers, header_name_of_exp_id)
        header_index_of_header_name = general.index_in_unique_list(
            headers, header_name)
        for row in reader:
            exp_id = row[header_index_of_exp_id]
            field_value = row[header_index_of_header_name]
            field_dict[exp_id] = field_value
    return field_dict
Esempio n. 4
0
def create_experiment_fns(doe_csv, header_name_of_exp_id, in_dir, extension_name):
    ''' this function takes a doe_csv fullname, opens the file, 
    and returns a dictionary {'exp_id':'fullname_to_exp_file'}
    Assumption: unique exp_id, and all files in the same dir with the same extension
    '''
    field_dict = {}
    with open(doe_csv,'rb') as csvfile:
        reader = csv.reader(csvfile)
        headers = reader.next()
        header_index_of_exp_id = general.index_in_unique_list(headers, header_name_of_exp_id)
        for row in reader:
            exp_id = row[header_index_of_exp_id]
            field_dict[exp_id] = os.path.join(in_dir, exp_id+extension_name)
    return field_dict
Esempio n. 5
0
def create_experiment_fns(doe_csv, header_name_of_exp_id, in_dir,
                          extension_name):
    ''' this function takes a doe_csv fullname, opens the file, 
    and returns a dictionary {'exp_id':'fullname_to_exp_file'}
    Assumption: unique exp_id, and all files in the same dir with the same extension
    '''
    field_dict = {}
    with open(doe_csv, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        headers = reader.next()
        header_index_of_exp_id = general.index_in_unique_list(
            headers, header_name_of_exp_id)
        for row in reader:
            exp_id = row[header_index_of_exp_id]
            field_dict[exp_id] = os.path.join(in_dir, exp_id + extension_name)
    return field_dict
Esempio n. 6
0
def run_trim_galore(logs_dir, out_dir, doe_csv_fn, header_name_of_in_fn, header_name_of_adapter_seq, header_name_of_read_length, trim_galore_options, rm_shorter_than_space, tissue):
    trim_galore_options_list = trim_galore_options.split(' ')
    clip_R1_value_index = general.index_in_unique_list(trim_galore_options_list, '--clip_R1')+1
    print trim_galore_options_list[clip_R1_value_index]
    clip_R1_value = int(trim_galore_options_list[clip_R1_value_index])
    qname = 'regevlab'
    mem_usage = '5000'
    dict_fq_fns = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_in_fn)
    dict_adapter_seq = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_adapter_seq)
    dict_read_length = doe_reader.read_experiment_field(doe_csv_fn, 'name', header_name_of_read_length)
    myos.remove_all_files_given_dir(out_dir)
    myos.check_if_directory_exists_create_it(out_dir)
    for exp_name, in_fn in dict_fq_fns.iteritems():
        adapter_seq = dict_adapter_seq[exp_name]
        read_length = int(dict_read_length[exp_name])
        bsubcmd = myos.create_bsub_string_no_rm_logs_dir(logs_dir, exp_name+'_'+tissue, qname = qname, mem_usage = mem_usage)
        runcmd_tgf = execs_commands.trim_galore_filter(adapter_seq, trim_galore_options+' --length %s' %(read_length-clip_R1_value-rm_shorter_than_space), in_fn, out_dir)
        fullcmd = bsubcmd+'\"'+runcmd_tgf+'\"'
        print fullcmd
        myos.write_fullcmd(fullcmd, logs_dir, exp_name+'_'+tissue)
        os.system(fullcmd)
    return 0