Example #1
0
def loadFastqc(filename,
               backend="sqlite",
               database="csvdb",
               host="",
               username="",
               password="",
               port=3306):
    '''load FASTQC statistics into database.

    Each section will be uploaded to its own table.

    Arguments
    ----------
    filename : string
        Filename with FASTQC data
    backend : string
        Database backend. Only this is required for an sqlite database.
    host : string
        Database host name
    username : string
        Database user name
    password : string
        Database password
    port : int
        Database server port.
    '''

    parser = CSV2DB.buildParser()
    (options, args) = parser.parse_args([])

    options.database_backend = backend
    options.database_host = host
    options.database_name = database
    options.database_username = username
    options.database_password = password
    options.database_port = port
    options.allow_empty = True

    for fn in glob.glob(filename):
        prefix = os.path.basename(os.path.dirname(fn))
        results = []

        for name, status, header, data in FastqcSectionIterator(
                IOTools.openFile(fn)):
            # do not collect basic stats, see loadFastQCSummary
            if name == "Basic Statistics":
                continue

            options.tablename = prefix + "_" + re.sub(" ", "_", name)

            inf = StringIO("\n".join([header] + data) + "\n")
            CSV2DB.run(inf, options)
            results.append((name, status))

        # load status table
        options.tablename = prefix + "_status"

        inf = StringIO("\n".join(["name\tstatus"] +
                                 ["\t".join(x) for x in results]) + "\n")
        CSV2DB.run(inf, options)
Example #2
0
def main(argv=sys.argv):

    parser = CSV2DB.buildParser()

    (options, args) = E.Start(parser, argv=argv, add_psql_options=True)

    if options.from_zipped:
        import gzip
        infile = gzip.GzipFile(fileobj=options.stdin, mode='r')

    else:
        infile = options.stdin

    CSV2DB.run(infile, options)

    E.Stop()
Example #3
0
def main( argv = sys.argv ):

    parser = CSV2DB.buildParser()

    (options, args) = E.Start( parser, argv = argv, add_psql_options = True )

    if options.from_zipped:
        import gzip
        infile = gzip.GzipFile( fileobj= options.stdin, mode='r')

    else:
        infile = options.stdin

    CSV2DB.run( infile, options )

    E.Stop()
Example #4
0
def loadFastqc(filename):
    '''load FASTQC stats.'''

    for fn in glob.glob(filename):
        prefix = os.path.basename(os.path.dirname(fn))
        results = []

        for name, status, header, data in FastqcSectionIterator(
                IOTools.openFile(fn)):
            # do not collect basic stats, see loadFastQCSummary
            if name == "Basic Statistics":
                continue

            parser = CSV2DB.buildParser()
            (options, args) = parser.parse_args([])
            options.tablename = prefix + "_" + re.sub(" ", "_", name)
            options.allow_empty = True

            inf = cStringIO.StringIO("\n".join([header] + data) + "\n")
            CSV2DB.run(inf, options)
            results.append((name, status))

        # load status table
        parser = CSV2DB.buildParser()
        (options, args) = parser.parse_args([])
        options.tablename = prefix + "_status"
        options.allow_empty = True

        inf = cStringIO.StringIO(
            "\n".join(["name\tstatus"] +
                      ["\t".join(x) for x in results]) + "\n")
        CSV2DB.run(inf, options)
Example #5
0
def loadFastqc( infile, outfile ):
    '''load FASTQC stats.'''
    
    track = P.snip( infile, ".fastqc" )

    filename = os.path.join( PARAMS["exportdir"], "fastqc", track + "*_fastqc", "fastqc_data.txt" )

    for fn in glob.glob( filename ):
        prefix = os.path.basename( os.path.dirname( fn ) )
        results = []
        
        for name, status, header, data in FastqcSectionIterator(IOTools.openFile( fn )):
            # do not collect basic stats, see loadFastQCSummary
            if name == "Basic Statistics": continue

            parser = CSV2DB.buildParser()
            (options, args) = parser.parse_args([])
            options.tablename = prefix + "_" + re.sub(" ", "_", name ) 
            options.allow_empty= True

            inf = cStringIO.StringIO( "\n".join( [header] + data ) + "\n" )
            CSV2DB.run( inf, options )
            results.append( (name, status ) )

        # load status table
        parser = CSV2DB.buildParser()
        (options, args) = parser.parse_args([])
        options.tablename = prefix + "_status"
        options.allow_empty= True

        inf = cStringIO.StringIO( "\n".join( ["name\tstatus"] + ["\t".join( x ) for x in results ] ) + "\n" )
        CSV2DB.run( inf, options )

    P.touch( outfile )
Example #6
0
def loadFastqc( infile, outfile ):
    '''load FASTQC stats.'''
    
    track = P.snip( infile, ".fastqc" )

    def section_iterator( infile ):

        data = []
        for line in infile:
            if line.startswith( ">>END_MODULE" ): 
                yield name, status, header, data
            elif line.startswith(">>"):
                name, status = line[2:-1].split("\t")
                data = []
            elif line.startswith("#"):
                header = "\t".join([ x for x in line[1:-1].split("\t") if x != ""] )
            else:
                data.append( "\t".join([ x for x in line[:-1].split("\t") if x != ""] ) )

    filename = os.path.join( PARAMS["exportdir"], "fastqc", track + "*_fastqc", "fastqc_data.txt" )

    for fn in glob.glob( filename ):
        prefix = os.path.basename( os.path.dirname( fn ) )
        results = []
        
        for name, status, header, data in section_iterator(IOTools.openFile( fn )):

            parser = CSV2DB.buildParser()
            (options, args) = parser.parse_args([])
            options.tablename = prefix + "_" + re.sub(" ", "_", name ) 
            options.allow_empty= True

            inf = cStringIO.StringIO( "\n".join( [header] + data ) + "\n" )
            CSV2DB.run( inf, options )
            results.append( (name, status ) )

        # load status table
        parser = CSV2DB.buildParser()
        (options, args) = parser.parse_args([])
        options.tablename = prefix + "_status"
        options.allow_empty= True

        inf = cStringIO.StringIO( "\n".join( ["name\tstatus"] + ["\t".join( x ) for x in results ] ) + "\n" )
        CSV2DB.run( inf, options )

    P.touch( outfile )
Example #7
0
def loadFastqc(filename,
               backend="sqlite",
               database="csvdb",
               host="",
               username="",
               password="",
               port=3306):
    '''load FASTQC statistics into database.

    Each section will be uploaded to its own table.

    Arguments
    ----------
    filename : string
        Filename with FASTQC data
    backend : string
        Database backend. Only this is required for an sqlite database.
    host : string
        Database host name
    username : string
        Database user name
    password : string
        Database password
    port : int
        Database server port.
    '''

    parser = CSV2DB.buildParser()
    (options, args) = parser.parse_args([])

    options.database_backend = backend
    options.database_host = host
    options.database_name = database
    options.database_username = username
    options.database_password = password
    options.database_port = port
    options.allow_empty = True

    for fn in glob.glob(filename):
        prefix = os.path.basename(os.path.dirname(fn))
        results = []

        for name, status, header, data in FastqcSectionIterator(
                IOTools.openFile(fn)):
            # do not collect basic stats, see loadFastQCSummary
            if name == "Basic Statistics":
                continue

            options.tablename = prefix + "_" + re.sub(" ", "_", name)

            inf = cStringIO.StringIO("\n".join([header] + data) + "\n")
            CSV2DB.run(inf, options)
            results.append((name, status))

        # load status table
        options.tablename = prefix + "_status"

        inf = cStringIO.StringIO(
            "\n".join(["name\tstatus"] +
                      ["\t".join(x) for x in results]) + "\n")
        CSV2DB.run(inf, options)