Ejemplo n.º 1
0
def parse(args,kmap = {}):
    import os.path as OP
    import getopt
    from trident import chromosome_chopper as chopper

    if len(args) < 2:
        print("trident processes require at least two filenames")
        return None

    header_map = {}
    rsc_memory_limit = default_memory_limit
    app_profile = None
    project_name = None
    chunk_size = default_chunk_size
    host = None
    if 'assembly' in kmap:
        header_map['assembly'] = " ".join(kmap['assembly'])
        print("Labeling Assembly: {0}".format(header_map['assembly']))
    if 'species' in kmap:
        header_map['species'] = kmap['species']
        if len(header_map['species']) > 2:
            print("WARNING: Species contains more than three words.")
        print("Labeling Species: {0}".format(header_map['species']))
    if 'memory_limit' in kmap:
        rsc_memory_limit = int(kmap['memory_limit'][0])
        print("Using memory limit of {0} bytes".format(rsc_memory_limit))
    if 'app' in kmap:
        app_profile = kmap['app'][0]
    if 'project_name' in kmap:
        project_name = kmap['project_name'][0]
    if 'chunk_size' in kmap:
        chunk_size = int(kmap['chunk_size'][0])
    if 'host' in kmap:
        host = kmap['host'][0]

    mirna = args[0]
    dna = args[1]

    print("Running trident with miRNA %s and DNA %s" % (mirna, dna))
    if len(args) > 2:
        print ("with flags %s" % " ".join(args[2:]))

    if not OP.isdir(temp_directory):
        OP.os.mkdir(temp_directory)

    num_files = 1
    if OP.isfile(dna):
        if "-sc" in args:
            for i in range(0,len(args)):
                if args[i] == "-sc":
                    if i+1 < len(args):
                        if int(args[i+1]) < 140:
                            chunk_size /= 3
        num_files = chopper.chopper(dna,segment_fmt % OP.basename(dna),chunk_size,overwrite = False,header_map = header_map)

    # sanity check chopper. No need to chop the sequence into one segment file
    if num_files == 1:
        chopped_file = (segment_fmt + "-1") % OP.basename(dna)
        if OP.isfile(chopped_file):
            OP.os.unlink(chopped_file)
    else:
        print("Created %d dna files" % num_files)

    retval = []

    strargs = " ".join(args)

    have_output = ("-out" in args[2:])
    
    if num_files == 1:
        # in case the dna did not need to be segmented
        proc = TridentInstance(mirna,dna,strargs)
        proc.rsc_memory_limit = rsc_memory_limit
        proc.executable_name = "trident.centos6.1"
        if project_name:
            proc.project_name = project_name
        if app_profile:
            proc.application_profile = app_profile
        if not have_output:
            proc.args += " -out %s.out" % dna
        if host:
            proc.host = host
        retval.append(proc.get_dag_node())
    else:
        for i in list(range(1,num_files+1)):
            #iterate through segments and setup work units
            # for the sake of running on a grid, an output file is required.
            segment_name = (segment_fmt + "-%s") % (OP.basename(dna),i)
            proc = TridentInstance(mirna,segment_name,strargs)
            proc.rsc_memory_limit = rsc_memory_limit
            proc.executable_name = "trident.centos6.1"
            if host:
                proc.host = host
            if app_profile:
                proc.application_profile = app_profile
            if project_name:
                proc.project_name = project_name
            if not have_output:
                proc.args += " -out {0}.out".format(segment_name)
            proc.args = proc.args.replace(".out",".out-{0}".format(i))
            retval.append(proc.get_dag_node())
    
        
    return retval
Ejemplo n.º 2
0
def parse(args,kmap = {}):
    import os.path as OP
    from trident import chromosome_chopper as chopper
    
    if len(args) < 2:
        print("trident processes require at least two filenames")
        return None

    header_map = {}
    if 'assembly' in kmap:
        header_map['assembly'] = " ".join(kmap['assembly'])
        print("Labeling Assembly: {0}".format(header_map['assembly']))
    if 'species' in kmap:
        header_map['species'] = kmap['species']
        if len(header_map['species']) > 2:
            print("WARNING: Species contains more than three words.")
        print("Labeling Species: {0}".format(header_map['species']))

    mirna = args[0]
    dna = args[1]

    print("Running trident with miRNA %s and DNA %s" % (mirna, dna))
    if len(args) > 2:
        print ("with flags %s" % " ".join(args[2:]))

    if not OP.isdir(".GRID"):
        OP.os.mkdir(".GRID")

    num_files = 1
    if OP.isfile(dna):
        chunk_size = default_chunk_size
        if "-sc" in args:
            for i in range(0,len(args)):
                if args[i] == "-sc":
                    if i+1 < len(args):
                        if int(args[i+1]) < 140:
                            chunk_size /= 3
        num_files = chopper.chopper(dna,segment_fmt % OP.basename(dna),chunk_size,overwrite = False,header_map = header_map)

    # sanity check chopper. No need to chop the sequence into one segment file
    if num_files == 1:
        chopped_file = (segment_fmt + "-1") % OP.basename(dna)
        if OP.isfile(chopped_file):
            OP.os.unlink(chopped_file)
    else:
        print("Created %d dna files" % num_files)

    retval = []

    strargs = " ".join(args[2:])

    have_output = ("-out" in args[2:])
    
    if num_files == 1:
        # in case the dna did not need to be segmented
        proc = TridentInstance(mirna,dna,strargs)
        if not have_output:
            proc.args += " -out %s.out" % dna
        retval.append(proc.get_dag_node())
    else:
        for i in list(range(1,num_files+1)):
            #iterate through segments and setup work units
            # for the sake of running on a grid, an output file is required.
            segment_name = (segment_fmt + "-%s") % (OP.basename(dna),i)
            proc = TridentInstance(mirna,segment_name,strargs)
            if not have_output:
                proc.args += " -out %s.out" % segment_name
            retval.append(proc.get_dag_node())
    
        
    return retval