def main(argv): #logging detup logger = logging.getLogger('myapp') hdlr = logging.FileHandler('/tmp/myapp.log') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.DEBUG) startTime = datetime.now() logger.info("Application execution started at " + str(startTime)) #retrieve options for this job configfile, filepattern, inputfilepath, csvlogging, target_format, source_file, source_format, special = extractConfigurations( argv) momentum = datetime.now() now = momentum - startTime units = None counter = 1 annotator = Annotator(None, configfile) full_list_magazines = None minm = 0 maxm = 0 print(filepattern) print(inputfilepath) print(configfile) #kata=True for path in inputfilepath: try: mags = results = None #print("Before anything") #print(full_list_magazines) if special == "kata": mags, results = readKataMagazines(logger) magazines = None results, magazines, min, max = readFile(logger, filepattern, now, path, mags) minm, maxm = checkLength(min, minm, maxm, logger) minm, maxm = checkLength(max, minm, maxm, logger) #print("After reading files") #print(minm) #print(maxm) #print(results) #print(magazines) #print(full_list_magazines) u = annotator.doAnnotationWithConfig(results, mags, magazines, csvlogging, units) #print("After annotating them") #print(magazines) if full_list_magazines != None: for m in magazines: if m not in full_list_magazines: full_list_magazines.append(m) #print(magazines) #print(full_list_magazines) else: full_list_magazines = magazines #for m in full_list_magazines: # print(m) if units != None: #units.extend(u) for node in u: if node not in units: units.append(node) else: units = u logger.info("Processed " + str(counter) + "/" + str(len(inputfilepath))) counter = counter + 1 #print("After everything") #print(full_list_magazines) #if counter == 10: #remove this later except Exception as e: print("Error happened during execution: " + str(path)) print("Error happened during execution: " + str(e)) logger.warn( "Unexpected error while processing data " + str(path) + ":", e) error = traceback.format_exc() print(error.upper()) if full_list_magazines != None: writeTextOutput(full_list_magazines) else: logger.error("magaine list is empty!") rank = annotator.doRanking() tfidf, limit = logCandicates(logger, full_list_magazines, rank) #logger.debug(tfidf) if units != None: print("Check ranking" + str(limit)) rank, rank_range = annotator.doRanking() print(len(rank)) if len(rank) > 0: limits = (minm, maxm) logger.info("Execute candidate ranking for " + str(rank) + " " + str(limit)) apply_weights(units, tfidf, rank, logger, limits, rank_range, limit) #use when using ranges #apply_weights(units, tfidf, rank, logger, limits, rank_range) print("convert to rdf") writeResultsToRDF(units, annotator, counter, target_format, source_file, source_format) writeXmlOutput(full_list_magazines) writeCSVOutputOfResults(full_list_magazines) annotator.writeToCSV(full_list_magazines) annotator.logConseptsByIndex(full_list_magazines) #writeResultsToRDF(u,annotator,counter, target_format, source_file, source_format) annotator.print_filtered_terms(full_list_magazines) annotator.print_included_terms(full_list_magazines) annotator.print_stats(full_list_magazines) now = datetime.now() - momentum end = datetime.now() - startTime print("Finished queries in " + str(now)) print("REACHED THE END in " + str(end)) logger.info("Application execution ended, and it lasted for " + str(end))