Python getConfiguredOutputs Examples

Programming Language: Python

Namespace/Package Name: output

Method/Function: getConfiguredOutputs

Examples at hotexamples.com: 4

Python getConfiguredOutputs - 4 examples found. These are the top rated real world Python examples of output.getConfiguredOutputs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: cooccurrences.py Project: moma/easiparse

def search_subworker(config, content, year, doublet):
    """
    Responsible for matching the pair and incrementing cooccurrences count
    """
    logging.debug("looking for cooc of %s and %s"%(doublet[0]['label'],
        doublet[1]['label']))
    outputs = output.getConfiguredOutputs(config['cooccurrences'])
    regex1 = re.compile( r"\b%s\b"%"|".join(doublet[0]['edges']['label'].keys()), re.I|re.M|re.U )
    regex2 = re.compile( r"\b%s\b"%"|".join(doublet[1]['edges']['label'].keys()), re.I|re.M|re.U )

    if regex1.search(content) is not None and regex2.search(content) is not None:
        logging.debug("found a cooc !")
        # will look for both composed ID
        doublet_id12 = year\
            +"_"+ doublet[0]["id"]\
            +"_"+ doublet[1]["id"]
                        
        doublet_id21 = year\
            +"_"+ doublet[1]["id"]\
            +"_"+ doublet[0]["id"]

        if outputs['mongodb'].mongodb.coocmatrix.find_one({'_id':doublet_id12}) is not None:
            outputs['mongodb'].mongodb.coocmatrix.update(\
                {'_id': doublet_id12},\
                {'_id': doublet_id12, '$inc':\
                {'value': 1}}, upsert=True)
        elif outputs['mongodb'].mongodb.coocmatrix.find_one({'_id':doublet_id21}) is not None:
            outputs['mongodb'].mongodb.coocmatrix.update(\
                {'_id': doublet_id21},\
                {'_id': doublet_id21, '$inc':\
                {'value': 1}}, upsert=True)
        else:
            # anyway saves a new cooc line using 'id12' ID
            outputs['mongodb'].mongodb.coocmatrix.save(\
                {'_id': doublet_id12, 'value': 1})

Example #2

Show file

File: cooccurrences.py Project: moma/easiparse

def main(config):
    """
    main occurrences processor
    reads a whitelist and push a occurrences_worker() to a process pool
    """
    whitelistpath = config['cooccurrences']["whitelist"]["path"]
    logging.debug("loading whitelist from %s (id = %s)"%(whitelistpath, whitelistpath))

    wlimport = Reader('whitelist://'+whitelistpath, dialect="excel", encoding="ascii")
    wlimport.whitelist = whitelist.Whitelist( whitelistpath, whitelistpath )
    newwl = wlimport.parse_file()
    newwl['content']=[]
    # cursor of Whitelist NGrams db
    ngramgenerator = newwl.getNGram()
    outputs = output.getConfiguredOutputs(config['cooccurrences'])
    try:
        while 1:
            ngid, ng = ngramgenerator.next()
            newwl['content'] += [ng]
            outputs['exportwhitelistcsv'].save("%s,%s\n"%(ngid,ng['label']))
            #raise StopIteration()
    except StopIteration:
        logging.debug('imported %d n-lemmes from the whitelist file %s'\
                %(len(newwl['content']), whitelistpath))
     
    input = mongodbhandler.MongoDB(config['cooccurrences']['input_db'])
    #occspool = pool.Pool(processes=config['processes'])
    for notice in input.notices.find(timeout=False):
        #occspool.apply_async(worker, (config, notice, newwl))
        worker(config, notice, newwl)

Example #3

Show file

File: cooccurrences.py Project: moma/easiparse

def exportcooc(config):
    """
    Basic exporter of the cooccurrences stored to files
    """
    outputs = output.getConfiguredOutputs(config['cooccurrences'])
    for pair in outputs['mongodb'].mongodb.coocmatrix.find():
        year, ngi, ngj = pair['_id'].split("_")
        cooc = pair['value']
        outputs['coocmatrixcsv'].save("%s,%s,%d,%s\n"%(ngi, ngj, cooc, year))

Example #4

Show file

File: extractor.py Project: moma/easiparse

def extract_worker(config, fieldname):
    """
    copies input db notices matching a regexg to an output db
    """
    input = mongodbhandler.MongoDB(config['extractor']['input_db'])
    outputs = output.getConfiguredOutputs( config['extractor'] )
    reg = re.compile( config['extractor']['filters']['regexp_content']['regexp'], re.I|re.U|re.M)

    for notice in input.notices.find({ fieldname:{"$regex":reg} }, timeout=False):
        outputs['mongodb'].save(notice, "notices")