Esempio n. 1
0
    def process(self):


        if type(self.input_files) != list:
            self.input_files = [self.input_files]

        self.output_files = []
        for input_biom in self.input_files:
            fileName, fileExt = os.path.splitext(os.path.basename(input_biom))
            output_tsv = '%s.tsv' % fileName

            if os.path.isfile(output_tsv):
                os.remove(output_tsv)
            self.log.info('Converting %s into %s', input_biom, output_tsv)

            # For now only allow tsv conversion using option '-b'
            cmd = '%s convert -i %s -o %s -b' % (which('biom'), input_biom, output_tsv)

            self.submit_cmd(cmd)

            if self.remove_hdr:
                ip_fn = open(output_tsv,'r')
                op_fn = open('%s.tmp' % output_tsv,'w')
                next(ip_fn)
                for line in ip_fn:
                   op_fn.write(line)
                ip_fn.close()
                op_fn.close()
                os.rename('%s.tmp' % output_tsv, output_tsv)

            self.output_files.append(output_tsv)
Esempio n. 2
0
    def process(self):

        if type(self.input_files) != list:
            self.input_files = [self.input_files]

        self.output_files = []
        for input_biom in self.input_files:
            fileName, fileExt = os.path.splitext(os.path.basename(input_biom))
            output_tsv = '%s.tsv' % fileName

            if os.path.isfile(output_tsv):
                os.remove(output_tsv)
            self.log.info('Converting %s into %s', input_biom, output_tsv)

            # For now only allow tsv conversion using option '-b'
            cmd = '%s convert -i %s -o %s -b' % (which('biom'), input_biom,
                                                 output_tsv)

            self.submit_cmd(cmd)

            if self.remove_hdr:
                ip_fn = open(output_tsv, 'r')
                op_fn = open('%s.tmp' % output_tsv, 'w')
                next(ip_fn)
                for line in ip_fn:
                    op_fn.write(line)
                ip_fn.close()
                op_fn.close()
                os.rename('%s.tmp' % output_tsv, output_tsv)

            self.output_files.append(output_tsv)
Esempio n. 3
0
def resume(user, cfg, run_id, pids):
    """
    Resumes as user 'user' a pipeline defined by the given config
    Returns exit code, stdout, and stderr.
    """
    pids[run_id] = mp.current_process().pid
    cmd = [which('np_submit.py'), cfg]
    (ec, err, out) = run_as(cmd=cmd, user=user)
    if ec == 0:
        return (err, out)
    else:
        raise Exception('Unable to execute cmd %s:\n %s' % (cmd, err))
Esempio n. 4
0
def submit(config, user, run_id, pids):
    """
    Submits pipeline defined by 'config' as user 'user'.
    Dumps the config in a temp. file that is removed after succesful completion.
    Returns exit code, stdout, and stderr.
    """
    pids[run_id] = mp.current_process().pid
    (fd, tmp_cfg) = tempfile.mkstemp(prefix='pypers_', suffix='.cfg', text=True)
    os.fchmod(fd, 0644)
    with os.fdopen(fd, 'w') as fh:
        json.dump(config, fh)
    cmd = [which('np_submit.py'), '-i', tmp_cfg]
    (ec, err, out) = run_as(cmd=cmd, user=user)
    if ec == 0:
        os.unlink(tmp_cfg)
        return (err, out)
    else:
        raise Exception('Unable to execute cmd %s:\n%s\n%s' % (cmd, err, out))
Esempio n. 5
0
    def exec_monitoring(self):
        """
        Check if all the flow cell IDs have been demultiplexed
        For each flow cell which has not been demultiplexed,
        then the demultiplexing pipeline is submitted to the cluster
        """
        #Create a dictionary with {"Fw cell ID" : "path"}
        fw_cell_dirs = {}
        missing_ss_list = []
        for hiseq_dir in self.hiseq_dirs:
            #Parse all the hiseq dirs and create a list of data directories
            #Only the directorise with the "RTAComplete.txt" file are considered
            for fwcell in os.listdir(hiseq_dir):
                fwcell_path = os.path.join(hiseq_dir, fwcell)

                if (re.search(".+_.+_.+_.+", fwcell) \
                and "Temp" not in fwcell \
                and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))):
                    ss_found = False
                    #search for the sample sheet in the fwcell_path
                    for filename in os.listdir(fwcell_path):
                        if ("SampleSheet" in filename) and (".csv" in filename):
                            ss_found = True
                            break
                    if ss_found:
                        fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell)
                    #otherwise add the directory to the list of missing sample sheet
                    else:
                        missing_ss_list.append(os.path.join(hiseq_dir, fwcell))

        #log all the missing sample sheets detected
        if missing_ss_list:
            print ("******************************************************")
            for missing_ss in missing_ss_list:
                print ("Missing sample sheet in %s "% missing_ss)


        #create a set for the hiseq dirs and a set for the demultiplexed dirs
        hiseq_flow_cells = set([key for key in fw_cell_dirs])
        demu_flow_cells = set(os.listdir(self.demu_dir))
        if not hiseq_flow_cells.issubset(demu_flow_cells):
            #get the difference
            fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells)
            if fwcell_diff:
                for fwcell_id in fwcell_diff:
                    submit_cmd = which('np_submit.py')
                    cmd = [
                        submit_cmd,
                        pipeline_names['demultiplexing'],
                        'pipeline.output_dir=%s' % os.path.join(self.demu_dir, fwcell_id),
                        'pipeline.project_name=Demux',
                        'pipeline.description=Demultiplexing',
                        'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id]
                    ]
                    run_as(cmd=cmd, user=self.user)

                    print("******************************************************")
                    print(" %s Queued demux  with:" % time.ctime())
                    print("   Input dir  : %s" % fw_cell_dirs[fwcell_id])
                    print("   Output dir : %s" % os.path.join(self.demu_dir, fwcell_id))
                    print("   Cmd : %s" % ' '.join(cmd))
                    print("******************************************************")
Esempio n. 6
0
#!/usr/bin/env python

import glob
import subprocess
import time
import os
import getpass
import sys
from pypers.utils.utils import which

NP_SUBMIT = which("np_submit.py")

if __name__ == '__main__':
    testdir = os.path.dirname(os.path.realpath(__file__))
    user = getpass.getuser()
    output_root = '/scratch/%s/pypers/test_suite/%.0f' % (user, time.time())

    if len(sys.argv)>1:
        tests = sys.argv[1:]
    else:
        tests = glob.glob('%s/*.json' %testdir)

    for test in tests:
        output_dir = os.path.join(output_root, os.path.basename(test).split('.')[0])
        cmd = [NP_SUBMIT, test, 'pipeline.output_dir=%s' % output_dir]
        #print ' '.join(cmd)
        subprocess.call(cmd)
Esempio n. 7
0
    def exec_monitoring(self):
        """
        Check if all the flow cell IDs have been demultiplexed
        For each flow cell which has not been demultiplexed,
        then the demultiplexing pipeline is submitted to the cluster
        """
        #Create a dictionary with {"Fw cell ID" : "path"}
        fw_cell_dirs = {}
        missing_ss_list = []
        for hiseq_dir in self.hiseq_dirs:
            #Parse all the hiseq dirs and create a list of data directories
            #Only the directorise with the "RTAComplete.txt" file are considered
            for fwcell in os.listdir(hiseq_dir):
                fwcell_path = os.path.join(hiseq_dir, fwcell)

                if (re.search(".+_.+_.+_.+", fwcell) \
                and "Temp" not in fwcell \
                and os.path.exists(os.path.join(fwcell_path, "RTAComplete.txt"))):
                    ss_found = False
                    #search for the sample sheet in the fwcell_path
                    for filename in os.listdir(fwcell_path):
                        if ("SampleSheet" in filename) and (".csv"
                                                            in filename):
                            ss_found = True
                            break
                    if ss_found:
                        fw_cell_dirs[fwcell] = os.path.join(hiseq_dir, fwcell)
                    #otherwise add the directory to the list of missing sample sheet
                    else:
                        missing_ss_list.append(os.path.join(hiseq_dir, fwcell))

        #log all the missing sample sheets detected
        if missing_ss_list:
            print("******************************************************")
            for missing_ss in missing_ss_list:
                print("Missing sample sheet in %s " % missing_ss)

        #create a set for the hiseq dirs and a set for the demultiplexed dirs
        hiseq_flow_cells = set([key for key in fw_cell_dirs])
        demu_flow_cells = set(os.listdir(self.demu_dir))
        if not hiseq_flow_cells.issubset(demu_flow_cells):
            #get the difference
            fwcell_diff = hiseq_flow_cells.difference(demu_flow_cells)
            if fwcell_diff:
                for fwcell_id in fwcell_diff:
                    submit_cmd = which('np_submit.py')
                    cmd = [
                        submit_cmd, pipeline_names['demultiplexing'],
                        'pipeline.output_dir=%s' %
                        os.path.join(self.demu_dir, fwcell_id),
                        'pipeline.project_name=Demux',
                        'pipeline.description=Demultiplexing',
                        'steps.inputs.input_dir=%s' % fw_cell_dirs[fwcell_id]
                    ]
                    run_as(cmd=cmd, user=self.user)

                    print(
                        "******************************************************"
                    )
                    print(" %s Queued demux  with:" % time.ctime())
                    print("   Input dir  : %s" % fw_cell_dirs[fwcell_id])
                    print("   Output dir : %s" %
                          os.path.join(self.demu_dir, fwcell_id))
                    print("   Cmd : %s" % ' '.join(cmd))
                    print(
                        "******************************************************"
                    )