Example #1
0
def run_prepare(*data):
    """
    Run seqcluster prepare to merge all samples in one file
    """
    out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster", "prepare")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    prepare_dir = os.path.join(out_dir, "prepare")
    tools = dd.get_expression_caller(data[0][0])
    if len(tools) == 0:
        logger.info("You didn't specify any other expression caller tool."
                       "You can add to the YAML file:"
                       "expression_caller:[trna, seqcluster, mirdeep2]")
    fn = []
    for sample in data:
        name = sample[0]["rgnames"]['sample']
        fn.append("%s\t%s" % (sample[0]['collapse'], name))
    args = namedtuple('args', 'debug print_debug minc minl maxl out')
    args = args(False, False, 2, 17, 40, out_dir)
    ma_out = op.join(out_dir, "seqs.ma")
    seq_out = op.join(out_dir, "seqs.fastq")
    min_shared = max(int(len(fn) / 10.0), 1)
    if not file_exists(ma_out):
        seq_l, sample_l = prepare._read_fastq_files(fn, args)
        with file_transaction(ma_out) as ma_tx:
            with open(ma_tx, 'w') as ma_handle:
                with open(seq_out, 'w') as seq_handle:
                    prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared)

    for sample in data:
        sample[0]["seqcluster_prepare_ma"] = ma_out
        sample[0]["seqcluster_prepare_fastq"] = seq_out
    return data
Example #2
0
def run_prepare(*data):
    """
    Run seqcluster prepare to merge all samples in one file
    """
    out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster", "prepare")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    prepare_dir = os.path.join(out_dir, "prepare")
    tools = dd.get_expression_caller(data[0][0])
    if len(tools) == 0:
        logger.info("You didn't specify any other expression caller tool."
                       "You can add to the YAML file:"
                       "expression_caller:[trna, seqcluster, mirdeep2]")
    fn = []
    for sample in data:
        name = sample[0]["rgnames"]['sample']
        fn.append("%s\t%s" % (sample[0]['collapse'], name))
    args = namedtuple('args', 'debug print_debug minc minl maxl out')
    args = args(False, False, 2, 17, 40, out_dir)
    ma_out = op.join(out_dir, "seqs.ma")
    seq_out = op.join(out_dir, "seqs.fastq")
    min_shared = max(int(len(fn) / 10.0), 1)
    if not file_exists(ma_out):
        seq_l, sample_l = prepare._read_fastq_files(fn, args)
        with file_transaction(ma_out) as ma_tx:
            with open(ma_tx, 'w') as ma_handle:
                with open(seq_out, 'w') as seq_handle:
                    logger.info("Prepare seqs.fastq with -minl 17 -maxl 40 -minc 2 --min_shared 0.1")
                    prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared)

    for sample in data:
        sample[0]["seqcluster_prepare_ma"] = ma_out
        sample[0]["seqcluster_prepare_fastq"] = seq_out
    return data
Example #3
0
def run_prepare(*data):
    """
    Run seqcluster prepare to merge all samples in one file
    """
    out_dir = os.path.join(dd.get_work_dir(data[0][0]), "seqcluster",
                           "prepare")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    prepare_dir = os.path.join(out_dir, "prepare")
    fn = []
    for sample in data:
        name = sample[0]["rgnames"]['sample']
        fn.append("%s\t%s" % (sample[0]['collapse'], name))
    args = namedtuple('args', 'debug print_debug minc minl maxl out')
    args = args(False, False, 2, 17, 40, out_dir)
    ma_out = op.join(out_dir, "seqs.ma")
    seq_out = op.join(out_dir, "seqs.fastq")
    min_shared = max(int(len(fn) / 10.0), 1)
    if not file_exists(ma_out):
        seq_l, sample_l = prepare._read_fastq_files(fn, args)
        with file_transaction(ma_out) as ma_tx:
            with open(ma_tx, 'w') as ma_handle:
                with open(seq_out, 'w') as seq_handle:
                    prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle,
                                                    seq_handle, min_shared)

    return data
Example #4
0
def run_prepare(data):
    """
    Run seqcluster prepare to merge all samples in one file
    """
    out_dir = os.path.join(dd.get_work_dir(data[0]), "seqcluster", "prepare")
    out_dir = os.path.abspath(safe_makedir(out_dir))
    config_file = os.path.join(out_dir, "prepare.conf")
    prepare_dir = os.path.join(out_dir, "prepare")
    fn = []
    for sample in data:
        name = sample["rgnames"]['sample']
        fn.append("%s\t%s" % (sample['collapse'], name))
    args = namedtuple('args', 'debug print_debug minc minl maxl out')
    args = args(False, False, 1, 17, 40, out_dir)
    seq_l, sample_l = prepare._read_fastq_files(fn, args)
    ma_out = op.join(out_dir, "seqs.ma")
    seq_out = op.join(out_dir, "seqs.fastq")
    min_shared = max(int(len(fn) / 10.0), 1)
    if not file_exists(ma_out):
        with file_transaction(ma_out) as ma_tx:
            with open(ma_tx, 'w') as ma_handle:
                with open(seq_out, 'w') as seq_handle:
                    prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared)

    return [data]
Example #5
0
 def test_preparedata(self):
     out_dir = "test/test_out_prepare"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     arg = namedtuple('args', 'minl maxl minc out')
     args = arg(15, 40, 1, out_dir)
     seq_l, list_s = _read_fastq_files(open("data/examples/collapse/config"), args)
     ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w')
     seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w')
     _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1)
     self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.ma")))
     self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.fa")))
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
 def test_preparedata(self):
     mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/","")
     os.chdir(os.path.join(mod_dir, "data/test_collapse"))
     if os.path.exists("seqs.ma"):
         os.remove("seqs.ma")
     if os.path.exists("seqs.fa"):
         os.remove("seqs.fa")
     arg = namedtuple('args', 'minl maxl minc')
     args = arg(15, 40, 1)
     seq_l, list_s = _read_fastq_files(open("config"), args)
     ma_out = open("seqs.ma", 'w')
     seq_out = open("seqs.fa", 'w')
     _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1)
     self.assertTrue(os.path.exists("seqs.ma"))
     self.assertTrue(os.path.exists("seqs.fa"))
Example #7
0
 def test_preparedata(self):
     out_dir = "test/test_out_prepare"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     arg = namedtuple('args', 'minl maxl minc out')
     args = arg(15, 40, 1, out_dir)
     seq_l, list_s = _read_fastq_files(
         open("data/examples/collapse/config"), args)
     ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w')
     seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w')
     _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1)
     self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.ma")))
     self.assertTrue(os.path.exists(os.path.join(out_dir, "seqs.fa")))
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
 def test_preparedata(self):
     mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/","")
     os.chdir(os.path.join(mod_dir, "data/examples/collapse"))
     out_dir = "test_out_prepare"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     arg = namedtuple('args', 'minl maxl minc out')
     args = arg(15, 40, 1, out_dir)
     seq_l, list_s = _read_fastq_files(open("config"), args)
     ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w')
     seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w')
     _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1)
     os.chdir(out_dir)
     self.assertTrue(os.path.exists("seqs.ma"))
     self.assertTrue(os.path.exists("seqs.fa"))
Example #9
0
 def test_preparedata(self):
     mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace(
         "seqcluster/", "")
     os.chdir(os.path.join(mod_dir, "data/examples/collapse"))
     out_dir = "test_out_prepare"
     if os.path.exists(out_dir):
         shutil.rmtree(out_dir)
     os.mkdir(out_dir)
     arg = namedtuple('args', 'minl maxl minc out')
     args = arg(15, 40, 1, out_dir)
     seq_l, list_s = _read_fastq_files(open("config"), args)
     ma_out = open(os.path.join(out_dir, "seqs.ma"), 'w')
     seq_out = open(os.path.join(out_dir, "seqs.fa"), 'w')
     _create_matrix_uniq_seq(list_s, seq_l, ma_out, seq_out, 1)
     os.chdir(out_dir)
     self.assertTrue(os.path.exists("seqs.ma"))
     self.assertTrue(os.path.exists("seqs.fa"))
Example #10
0
files = sys.argv[2]


out_dir = "./"

fn = []

args = namedtuple('args', 'debug print_debug minc minl maxl out')
args = args(False, False, 2, 17, 40, out_dir)

ma_out = op.join(out_dir, "seqs.ma")
seq_out = op.join(out_dir, "seqs.fastq")
min_shared = max(int(len(fn) / 10.0), 1)


arr_ids  = ids.split(',')
arr_files = files.split(',')

for id_, file_ in zip( arr_ids, arr_files ):
	fn.append("%s\t%s"%(file_,id_))


if not file_exists(ma_out):
	seq_l, sample_l = prepare._read_fastq_files(fn, args)
	with file_transaction(ma_out) as ma_tx:
		with open(ma_tx, 'w') as ma_handle:
			with open(seq_out, 'w') as seq_handle:
				prepare._create_matrix_uniq_seq(sample_l, seq_l, ma_handle, seq_handle, min_shared)