Example #1
0
# Output parameters
parser.add_argument('-d',  dest='database_filepath', required=True,
                    help='Path and filename of database to writ reads to.')

parser.add_argument('-f',  dest='force_overwrite', action='store_true', default=False,
                    help='Overwrite previous tables with the same name.')

args = parser.parse_args()

toc = time.time()

if args.input == '-':
    args.input = sys.stdin

# Connect/make database
db = Reads_db(db_file=args.database_filepath, recbyname=True)

if ('seqs' not in db.tables) or (args.force_overwrite == True):
    db.create_seqs_table(overwrite=args.force_overwrite, read_header=args.header_format)

if ('samples' not in db.tables) or (args.force_overwrite == True):
    db.create_samples_table(overwrite=args.force_overwrite)
    
db.load_seqs(data_files=args.input, barcode_files=args.barcodes, buffer_max=args.buffer_max,
             read_header=args.header_format)

total_t = time.time() - toc    
print >> sys.stderr, 'Loaded processed reads file in {0}'.format(
              time.strftime('%H:%M:%S', time.gmtime(total_t)))

if __name__ == '__main__':
Example #2
0
    parser.add_argument(
        "--max", dest="fmax", default=0, type=int, help="Maximum size of clusters to load. Default = 0 no max limit"
    )

    parser.add_argument(
        "--skipsort",
        dest="skipsort",
        action="store_true",
        help="Skip sorting the cluster file. Useful if file has already been previously sorted.",
    )

    args = parser.parse_args()

    if os.path.exists(args.output):

        db = Reads_db(args.output)
    else:
        raise Exception("Database file not found.")

    # Load cluster file
    db.load_cluster_file(
        cluster_file_handle=args.input,
        table_prefix=args.tableprefix,
        overwrite=args.overwrite,
        fmin=args.fmin,
        fmax=args.fmax,
        skipsort=args.skipsort,
        buffer_max=args.buffer,
    )

    total_t = time.time() - toc
Example #3
0
                        help='''SQL expression to filter the query which selects the sequences in the database. Default is to export all sequences in database.
                         Basic query is:
                             SELECT seqid, seq, phred FROM seqs INNER JOIN samples ON seqs.sampleId=samples.sampleId 
                                    WHERE <filter_expression> ''')
    parser.add_argument('-s',  dest='startidx', 
                        default=0,
                        help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.')
    
    parser.add_argument('-f',  dest='format', 
                        default='fasta',
                        help='Format of file written to output.')    
    
    parser.add_argument('-b',  dest='rowbuffer',
                        default=100000,
                        help='Read write buffer. Number of records to read before writing to file.')
    
    parser.add_argument('-F',  dest='overwrite', 
                        default=False,
                        help='Overwrite any file with same name as output.')    

    args = parser.parse_args()
    
    # Write records to output
    db = Reads_db(args.input, recbyname=True)
    
    fastafile_handle = db.write_reads(args.output, output_format=args.format,
                                      filter_expression=args.filter_expression,
                                      startidx=args.startidx, 
                                      rowbuffer=args.rowbuffer, 
                                      overwrite=args.overwrite)
Example #4
0
                         Basic query is:
                             SELECT * FROM {clusters}  
                                    WHERE <filter_expression> ''')
    parser.add_argument('-s',  dest='startidx', 
                        default=0,
                        help='Starting base index of DNA sequences that are written to file, used to miss out cutsite if desired.')
    
    parser.add_argument('-f',  dest='format', 
                        default='fasta',
                        help='Format of file written to output.')    
    
    print sys.argv
    args = parser.parse_args()
    
    # Write records to output
    db = Reads_db(args.input, recbyname=True)
    
    
    clusters_list = db.get_cluster_by_size()
    
    
    
    
    fastafile_handle = db.write_reads(args.output, format=args.format,
                                      filter_expression=args.filter_expression,
                                      startidx=args.startidx)
    
    if args.query_expression:
        fastafile_handle = db.write_reads(args.pattern, args.output, 
                                      use_type_column=args.typeflag, format='fasta')