Example #1
0
def main():
    fs = FS()
    args = get_arguments()
    #print args

    src_files = []
    if args.recursive:
        for dirpath, dirnames, filenames in os.walk(args.source_dir):
            for filename in filenames:
                if args.file_type is None:
                    src_files.append(os.path.join(dirpath, filename))
                elif filename.endswith(args.file_type):
                    src_files.append(os.path.join(dirpath, filename))
    else:
        for filename in next(os.walk(args.source_dir))[2]:
            if args.file_type is None:
                src_files.append(os.path.join(args.source_dir, filename))
            elif filename.endswith(args.file_type):
                src_files.append(os.path.join(args.source_dir, filename))

    cmds = []
    dest_files = []

    if not args.mapping_file is None:
        src2dest = {}
        content = fs.read2list(args.mapping_file)
        fs.close()

        #if args.mapping_header:
        for i in xrange(0, len(content)):
            try:
                (src_filename, dest_filename) = content[i].split()
                src2dest[src_filename] = dest_filename
            except Exception as ex:
                logging.warning(
                    "mapping src file to dest file [empty line?]: " + str(ex))
    src_file2dest_file = {}
    for src_file in src_files:
        src_filename = os.path.basename(src_file)
        dest_filename = src_filename
        if not args.re_list is None:
            for regular_expression in args.re_list:
                (find_str, replace_str) = regular_expression.split(":")
                print find_str, "be replaced with:", replace_str
                dest_filename = re.sub(find_str, replace_str, dest_filename)
            dest_file = os.path.join(args.out_dir, dest_filename)
            src_file2dest_file[src_file] = dest_file

        elif not args.mapping_file is None:
            if not src_filename in src2dest:
                logging.warning(
                    "\"%s\" is NOT listed in the mapping file!!!\n" %
                    src_filename)
            else:
                dest_file = os.path.join(args.out_dir, src2dest[src_filename])
                src_file2dest_file[src_file] = dest_file
        else:
            dest_file = os.path.join(args.out_dir, src_filename)
        src_file2dest_file[src_file] = dest_file

    for (src_file, dest_file) in sorted(src_file2dest_file.items()):
        if not os.path.exists(src_file):
            logging.error("\"%s\" does not exist!!!\n" % src_file)
            sys.exit()
        if args.hard_copy:
            cmd_line = "cp '%s' '%s'" % (src_file, dest_file)
        else:
            cmd_line = "ln -s %s %s" % (src_file, dest_file)
        logging.info(cmd_line)
        #print "\n"
        os.system(cmd_line)
Example #2
0
def main():
    fs = FS()
    z = ZIP()
    args = get_arguments()
    src_dir = args.src_dir

    next_seq_merge_dir = os.path.join(src_dir, "next_seq_merge")

    if not os.path.exists(next_seq_merge_dir):
        print next_seq_merge_dir, "is created!\n"
        os.makedirs(next_seq_merge_dir)

    next_seq_merge_command_sh = os.path.join(next_seq_merge_dir,
                                             "next_seq_merge_commands.sh")

    gz_file_list = []
    for root, dirs, files in os.walk(src_dir, topdown=False):
        for filename in files:
            if filename.endswith(r".gz"):
                gz_file_list.append(os.path.join(root, filename))

    gz_file_list = sorted(gz_file_list)
    if args.check_gz:
        z.test_gz(gz_file_list)
    #print gz_file_list
    #sys.exit()

    #z.test_gz(gz_file_list)
    #for gz_file in gz_file_list:
    #	test_gz_cmd = "gunzip -t " + gz_file
    #	fs.write(next_seq_merge_command_sh, test_gz_cmd + "\n")
    #fs.write(next_seq_merge_command_sh, test_gz_cmd + "\n\n")

    out_gz_list = []
    for gz_file in sorted(gz_file_list):
        if "_L001_" in gz_file:
            all_lane_file_found = True
            out_gz = os.path.join(
                next_seq_merge_dir,
                os.path.basename(gz_file.replace("_L001_", "_")))

            to_cat_list = [gz_file]
            to_cat_list.append(gz_file.replace("_L001_", "_L002_"))
            to_cat_list.append(gz_file.replace("_L001_", "_L003_"))
            to_cat_list.append(gz_file.replace("_L001_", "_L004_"))
            for to_cat_gz in to_cat_list:
                if to_cat_gz not in gz_file_list:
                    print "CAN'T find", to_cat_gz
                    all_lane_file_found = False
            if all_lane_file_found:
                fs.write(next_seq_merge_command_sh,
                         z.cat_gz(out_gz, to_cat_list) + "\n")
                out_gz_list.append(out_gz)

    fs.write(next_seq_merge_command_sh, "\n\n")

    if args.fastq:
        for out_gz in out_gz_list:
            fs.write(next_seq_merge_command_sh, "gunzip " + out_gz + "\n")

    fs.write(next_seq_merge_command_sh, "\n\n")

    qsub_cmd = "nohup qsub_cmd.py " + next_seq_merge_command_sh + " >/dev/null 2>&1 &"
    #print cmd3
    fs.write(next_seq_merge_command_sh, "\n#run this: " + qsub_cmd + "\n")
    fs.close()

    os.system(qsub_cmd)