def run_prepare(*data): """ Run seqcluster prepare to merge all samples in one file """ out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster", "prepare") out_dir = os.path.abspath(safe_makedir(out_dir)) prepare_dir = os.path.join(out_dir, "prepare") tools = dd.get_expression_caller(data[0][0]) if len(tools) == 0: logger.info("You didn't specify any other expression caller tool." "You can add to the YAML file:" "expression_caller:[trna, seqcluster, mirdeep2]") fn = [] for sample in data: name = sample[0]["rgnames"]['sample'] fn.append("%s\t%s" % (sample[0]['collapse'], name)) args = namedtuple('args', 'debug print_debug minc minl maxl out') args = args(False, False, 2, 17, 40, out_dir) ma_out = op.join(out_dir, "seqs.ma") seq_out = op.join(out_dir, "seqs.fastq") min_shared = max(int(len(fn) / 10.0), 1) if not file_exists(ma_out): seq_l, sample_l = prepare._read_fastq_files(fn, args) with file_transaction(ma_out) as ma_tx: with open(ma_tx, 'w') as ma_handle: with open(seq_out, 'w') as seq_handle: prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared) for sample in data: sample[0]["seqcluster_prepare_ma"] = ma_out sample[0]["seqcluster_prepare_fastq"] = seq_out return data
def run_prepare(*data): """ Run seqcluster prepare to merge all samples in one file """ out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster", "prepare") out_dir = os.path.abspath(safe_makedir(out_dir)) prepare_dir = os.path.join(out_dir, "prepare") tools = dd.get_expression_caller(data[0][0]) if len(tools) == 0: logger.info("You didn't specify any other expression caller tool." "You can add to the YAML file:" "expression_caller:[trna, seqcluster, mirdeep2]") fn = [] for sample in data: name = sample[0]["rgnames"]['sample'] fn.append("%s\t%s" % (sample[0]['collapse'], name)) args = namedtuple('args', 'debug print_debug minc minl maxl out') args = args(False, False, 2, 17, 40, out_dir) ma_out = op.join(out_dir, "seqs.ma") seq_out = op.join(out_dir, "seqs.fastq") min_shared = max(int(len(fn) / 10.0), 1) if not file_exists(ma_out): seq_l, sample_l = prepare._read_fastq_files(fn, args) with file_transaction(ma_out) as ma_tx: with open(ma_tx, 'w') as ma_handle: with open(seq_out, 'w') as seq_handle: logger.info("Prepare seqs.fastq with -minl 17 -maxl 40 -minc 2 --min_shared 0.1") prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared) for sample in data: sample[0]["seqcluster_prepare_ma"] = ma_out sample[0]["seqcluster_prepare_fastq"] = seq_out return data
def run_prepare(*data): """ Run seqcluster prepare to merge all samples in one file """ out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster", "prepare") out_dir = os.path.abspath(safe_makedir(out_dir)) prepare_dir = os.path.join(out_dir, "prepare") fn = [] for sample in data: name = sample[0]["rgnames"]['sample'] fn.append("%s\t%s" % (sample[0]['collapse'], name)) args = namedtuple('args', 'debug print_debug minc minl maxl out') args = args(False, False, 2, 17, 40, out_dir) ma_out = op.join(out_dir, "seqs.ma") seq_out = op.join(out_dir, "seqs.fastq") min_shared = max(int(len(fn) / 10.0), 1) if not file_exists(ma_out): seq_l, sample_l = prepare._read_fastq_files(fn, args) with file_transaction(ma_out) as ma_tx: with open(ma_tx, 'w') as ma_handle: with open(seq_out, 'w') as seq_handle: prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared) return data
def run_prepare(data): """ Run seqcluster prepare to merge all samples in one file """ out_dir = os.path.join(dd.get_work_dir(data[0]), "seqcluster", "prepare") out_dir = os.path.abspath(safe_makedir(out_dir)) config_file = os.path.join(out_dir, "prepare.conf") prepare_dir = os.path.join(out_dir, "prepare") fn = [] for sample in data: name = sample["rgnames"]['sample'] fn.append("%s\t%s" % (sample['collapse'], name)) args = namedtuple('args', 'debug print_debug minc minl maxl out') args = args(False, False, 1, 17, 40, out_dir) seq_l, sample_l = prepare._read_fastq_files(fn, args) ma_out = op.join(out_dir, "seqs.ma") seq_out = op.join(out_dir, "seqs.fastq") min_shared = max(int(len(fn) / 10.0), 1) if not file_exists(ma_out): with file_transaction(ma_out) as ma_tx: with open(ma_tx, 'w') as ma_handle: with open(seq_out, 'w') as seq_handle: prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared) return [data]
def test_preparedata(self): out_dir = "test/test_out_prepare" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) arg = namedtuple('args', 'minl maxl minc out') args = arg(15, 40, 1, out_dir) seq_l, list_s = _read_fastq_files(open("data/examples/collapse/config"), args) ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w') seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w') _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1) self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.ma"))) self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.fa"))) if os.path.exists(out_dir): shutil.rmtree(out_dir)
def test_preparedata(self): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/","") os.chdir(os.path.join(mod_dir, "data/test_collapse")) if os.path.exists("seqs.ma"): os.remove("seqs.ma") if os.path.exists("seqs.fa"): os.remove("seqs.fa") arg = namedtuple('args', 'minl maxl minc') args = arg(15, 40, 1) seq_l, list_s = _read_fastq_files(open("config"), args) ma_out = open("seqs.ma", 'w') seq_out = open("seqs.fa", 'w') _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1) self.assertTrue(os.path.exists("seqs.ma")) self.assertTrue(os.path.exists("seqs.fa"))
def test_preparedata(self): out_dir = "test/test_out_prepare" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) arg = namedtuple('args', 'minl maxl minc out') args = arg(15, 40, 1, out_dir) seq_l, list_s = _read_fastq_files( open("data/examples/collapse/config"), args) ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w') seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w') _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1) self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.ma"))) self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.fa"))) if os.path.exists(out_dir): shutil.rmtree(out_dir)
def test_preparedata(self): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/","") os.chdir(os.path.join(mod_dir, "data/examples/collapse")) out_dir = "test_out_prepare" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) arg = namedtuple('args', 'minl maxl minc out') args = arg(15, 40, 1, out_dir) seq_l, list_s = _read_fastq_files(open("config"), args) ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w') seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w') _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1) os.chdir(out_dir) self.assertTrue(os.path.exists("seqs.ma")) self.assertTrue(os.path.exists("seqs.fa"))
def test_preparedata(self): mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace( "seqcluster/", "") os.chdir(os.path.join(mod_dir, "data/examples/collapse")) out_dir = "test_out_prepare" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) arg = namedtuple('args', 'minl maxl minc out') args = arg(15, 40, 1, out_dir) seq_l, list_s = _read_fastq_files(open("config"), args) ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w') seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w') _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1) os.chdir(out_dir) self.assertTrue(os.path.exists("seqs.ma")) self.assertTrue(os.path.exists("seqs.fa"))
files = sys.argv[2] out_dir = "./" fn = [] args = namedtuple('args', 'debug print_debug minc minl maxl out') args = args(False, False, 2, 17, 40, out_dir) ma_out = op.join(out_dir, "seqs.ma") seq_out = op.join(out_dir, "seqs.fastq") min_shared = max(int(len(fn) / 10.0), 1) arr_ids = ids.split(',') arr_files = files.split(',') for id_, file_ in zip( arr_ids, arr_files ): fn.append("%s\t%s"%(file_,id_)) if not file_exists(ma_out): seq_l, sample_l = prepare._read_fastq_files(fn, args) with file_transaction(ma_out) as ma_tx: with open(ma_tx, 'w') as ma_handle: with open(seq_out, 'w') as seq_handle: prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared)