Ejemplo n.º 1
0
def main(argv):
    #logging detup
    logger = logging.getLogger('myapp')
    hdlr = logging.FileHandler('/tmp/myapp.log')
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    hdlr.setFormatter(formatter)
    logger.addHandler(hdlr)
    logger.setLevel(logging.DEBUG)

    startTime = datetime.now()
    logger.info("Application execution started at " + str(startTime))

    #retrieve options for this job
    configfile, filepattern, inputfilepath, csvlogging, target_format, source_file, source_format, special = extractConfigurations(
        argv)

    momentum = datetime.now()
    now = momentum - startTime
    units = None
    counter = 1
    annotator = Annotator(None, configfile)

    full_list_magazines = None
    minm = 0
    maxm = 0
    print(filepattern)
    print(inputfilepath)
    print(configfile)
    #kata=True

    for path in inputfilepath:
        try:
            mags = results = None
            #print("Before anything")
            #print(full_list_magazines)
            if special == "kata":
                mags, results = readKataMagazines(logger)

            magazines = None
            results, magazines, min, max = readFile(logger, filepattern, now,
                                                    path, mags)
            minm, maxm = checkLength(min, minm, maxm, logger)
            minm, maxm = checkLength(max, minm, maxm, logger)
            #print("After reading files")
            #print(minm)
            #print(maxm)
            #print(results)
            #print(magazines)
            #print(full_list_magazines)
            u = annotator.doAnnotationWithConfig(results, mags, magazines,
                                                 csvlogging, units)
            #print("After annotating them")
            #print(magazines)
            if full_list_magazines != None:
                for m in magazines:
                    if m not in full_list_magazines:
                        full_list_magazines.append(m)
                #print(magazines)
                #print(full_list_magazines)
            else:
                full_list_magazines = magazines

            #for m in full_list_magazines:
            #    print(m)

            if units != None:
                #units.extend(u)
                for node in u:
                    if node not in units:
                        units.append(node)
            else:
                units = u
            logger.info("Processed " + str(counter) + "/" +
                        str(len(inputfilepath)))
            counter = counter + 1
            #print("After everything")
            #print(full_list_magazines)

            #if counter == 10:
            #remove this later
        except Exception as e:
            print("Error happened during execution: " + str(path))
            print("Error happened during execution: " + str(e))
            logger.warn(
                "Unexpected error while processing data " + str(path) + ":", e)
            error = traceback.format_exc()
            print(error.upper())

    if full_list_magazines != None:
        writeTextOutput(full_list_magazines)
    else:
        logger.error("magaine list is empty!")

    rank = annotator.doRanking()
    tfidf, limit = logCandicates(logger, full_list_magazines, rank)
    #logger.debug(tfidf)
    if units != None:
        print("Check ranking" + str(limit))
        rank, rank_range = annotator.doRanking()
        print(len(rank))
        if len(rank) > 0:
            limits = (minm, maxm)
            logger.info("Execute candidate ranking for " + str(rank) + " " +
                        str(limit))
            apply_weights(units, tfidf, rank, logger, limits, rank_range,
                          limit)
            #use when using ranges
            #apply_weights(units, tfidf, rank, logger, limits, rank_range)
        print("convert to rdf")
        writeResultsToRDF(units, annotator, counter, target_format,
                          source_file, source_format)
        writeXmlOutput(full_list_magazines)
        writeCSVOutputOfResults(full_list_magazines)
        annotator.writeToCSV(full_list_magazines)
        annotator.logConseptsByIndex(full_list_magazines)

        #writeResultsToRDF(u,annotator,counter, target_format, source_file, source_format)
    annotator.print_filtered_terms(full_list_magazines)
    annotator.print_included_terms(full_list_magazines)
    annotator.print_stats(full_list_magazines)

    now = datetime.now() - momentum
    end = datetime.now() - startTime

    print("Finished queries in " + str(now))
    print("REACHED THE END in " + str(end))
    logger.info("Application execution ended, and it lasted for " + str(end))