Beispiel #1
0
 def test_database(self):
     if find_cmd("sqlite3"):
         with make_workdir() as workdir:
             arg = namedtuple('args', 'debug print_debug json ')
             args = arg(True, True, "../../data/examples/seqcluster.json")
             initialize_logger(".", args.debug, args.print_debug)
             logger = mylog.getLogger(__name__)
             logger.info(args)
             logger.info("Reading data")
             data = load_data(args.json)
             logger.info("Create database")
             make_database(data)
Beispiel #2
0
 def test_database(self):
     if find_cmd("sqlite3"):
         with make_workdir() as workdir:
             arg = namedtuple('args', 'debug print_debug json ')
             args = arg(True, True, "../../data/examples/seqcluster.json")
             initialize_logger(".", args.debug, args.print_debug)
             logger = mylog.getLogger(__name__)
             logger.info(args)
             logger.info("Reading data")
             data = load_data(args.json)
             logger.info("Create database")
             make_database(data)
Beispiel #3
0
 def test_databse(self):
     if find_cmd("sqlite3"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace("seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         arg = namedtuple('args', 'debug print_debug json ')
         args = arg(True, True, "seqcluster.json")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Create databse")
         make_database(data)
Beispiel #4
0
def report(args):
    """
    Create report in html format
    """
    logger.info("reading sequeces")
    data = load_data(args.json)

    logger.info("create profile")
    data = make_profile(data, os.path.join(args.out, "profiles"), args)
    logger.info("create database")
    make_database(data, "seqcluster.db", args.out)

    logger.info("Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output.")
Beispiel #5
0
def report(args):
    """
    Create report in html format
    """
    logger.info("Reading sequences")
    data = load_data(args.json)

    logger.info("Create profile")
    data = make_profile(data, os.path.join(args.out, "profiles"), args)
    logger.info("Create database")
    make_database(data, "seqcluster.db", args.out)

    logger.info("Done. Download https://github.com/lpantano/seqclusterViz/archive/master.zip to browse the output.")
Beispiel #6
0
 def test_databse(self):
     if find_cmd("sqlite3"):
         mod_dir = os.path.dirname(inspect.getfile(seqcluster)).replace(
             "seqcluster/", "")
         os.chdir(os.path.join(mod_dir, "data/examples"))
         arg = namedtuple('args', 'debug print_debug json ')
         args = arg(True, True, "seqcluster.json")
         initialize_logger(".", args.debug, args.print_debug)
         logger = mylog.getLogger(__name__)
         logger.info(args)
         logger.info("Reading data")
         data = load_data(args.json)
         logger.info("Create databse")
         make_database(data)
Beispiel #7
0
def cluster(args):
    """
    Creating clusters
    """

    args = _check_args(args)
    read_stats_file = op.join(args.dir_out, "read_stats.tsv")
    if file_exists(read_stats_file):
        os.remove(read_stats_file)

    bam_file, seq_obj = _clean_alignment(args)

    logger.info("Parsing matrix file")
    seqL, y, l = parse_ma_file(seq_obj, args.ffile)
    # y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'aligned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    if len(seqL.keys()) < 10:
        logger.error(
            "It seems you have low coverage. Please check your fastq files have enough sequences."
        )
        raise ValueError("So few sequences.")

    logger.info("Cleaning bam file")
    y, l = _total_counts(list(seqL.keys()), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'cleaned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    clusL = _create_clusters(seqL, bam_file, args)

    y, l = _total_counts(list(clusL.seq.keys()), clusL.seq, aligned=True)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'clusters'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    logger.info("Solving multi-mapping events in the network of clusters")
    clusLred = _cleaning(clusL, args.dir_out)
    y, l = _total_counts(clusLred.clus, seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'meta-cluster'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')
    logger.info("Clusters up to %s" % (len(clusLred.clus.keys())))

    if args.show:
        logger.info("Creating sequences alignment to precursor")
        clusLred = show_seq(clusLred, args.index)

    clusLred = peak_calling(clusLred)

    clusLred = _annotate(args, clusLred)

    logger.info("Creating json and count matrix")
    json_file = _create_json(clusLred, args)

    logger.info("Output file in: %s" % args.dir_out)

    if args.db:
        name = args.db + ".db"
        logger.info("Create database: database/" + name)
        data = load_data(json_file)
        out_dir = op.join(args.dir_out, "database")
        make_database(data, name, out_dir)
    logger.info("Finished")
Beispiel #8
0
def cluster(args):
    """
    Creating clusters
    """

    args = _check_args(args)
    read_stats_file = op.join(args.dir_out, "read_stats.tsv")
    if file_exists(read_stats_file):
        os.remove(read_stats_file)

    bam_file, seq_obj = _clean_alignment(args)

    logger.info("Parsing matrix file")
    seqL, y, l = parse_ma_file(seq_obj, args.ffile)
    # y, l = _total_counts(seqL.keys(), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'aligned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    if len(seqL.keys()) < 10:
        logger.error("It seems you have low coverage. Please check your fastq files have enough sequences.")
        raise ValueError("So few sequences.")

    logger.info("Cleaning bam file")
    y, l = _total_counts(list(seqL.keys()), seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'cleaned'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    clusL = _create_clusters(seqL, bam_file, args)

    y, l = _total_counts(list(clusL.seq.keys()), clusL.seq, aligned=True)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'clusters'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')

    logger.info("Solving multi-mapping events in the network of clusters")
    clusLred = _cleaning(clusL, args.dir_out)
    y, l = _total_counts(clusLred.clus, seqL)
    logger.info("counts after: %s" % sum(y.values()))
    logger.info("# sequences after: %s" % l)
    dt = pd.DataFrame({'sample': y.keys(), 'counts': y.values()})
    dt['step'] = 'meta-cluster'
    dt.to_csv(read_stats_file, sep="\t", index=False, header=False, mode='a')
    logger.info("Clusters up to %s" % (len(clusLred.clus.keys())))

    if args.show:
        logger.info("Creating sequences alignment to precursor")
        clusLred = show_seq(clusLred, args.index)

    clusLred = peak_calling(clusLred)

    clusLred = _annotate(args, clusLred)

    logger.info("Creating json and count matrix")
    json_file = _create_json(clusLred, args)

    logger.info("Output file in: %s" % args.dir_out)

    if args.db:
        name = args.db + ".db"
        logger.info("Create database: database/" + name)
        data = load_data(json_file)
        out_dir = op.join(args.dir_out, "database")
        make_database(data, name, out_dir)
    logger.info("Finished")