def updateCache(query): genericQuery = computeGenericQuery(query) modelStrings = utils.getModelsStrings(state.get('inputFilePath')) utils.initProgressBar() numOfThreads = CLASSIFICATION_THREADS lockR = Lock() lockW = Lock() # validModels will contain all models 'valid' in the sense that they comply with # the query constraints, and are also not among the already labelled models validModels = list() threads = list() for tIdx in range(numOfThreads): threads.append( Thread(target=popModelsAndCheckQuery, args=(modelStrings, genericQuery, lockR, lockW, validModels), daemon=True)) utils.printTitle( 'Searching for complying models, this might take a while.') for thread in threads: thread.start() for thread in threads: thread.join() randomSampleSize = min(len(validModels), QUERY_CACHE_SIZE) cache = random.sample(validModels, randomSampleSize) state.set('queryCache', cache) state.set('ranAQuery', True) # Update preQuery to have it appear by default in the query editor # next time the user wants to use it state.set('prevQuery', query)
def runILASPCommands(labelsToUpdateHypotheses): utils.printTitle( 'Please wait while the hypotheses are being computed, this might take a while.' ) backGroundStr = utils.getBackgroundString() biasConstantsStr = utils.computeBiasConstants() genericBiasStr = utils.getBiasString().replace('$$CONSTANTS$$', biasConstantsStr) outputs = {} lock = Lock() threads = list() for label in labelsToUpdateHypotheses: threads.append( Thread(target=runILASPCMDInThread, args=(backGroundStr, genericBiasStr, label, outputs, lock), daemon=True)) for thread in threads: thread.start() for thread in threads: thread.join() try: utils.updateHypotheses(outputs) state.get('hypothesesToUpdate').clear() except ExitError as e: print( '** Error: No hypotheses covering ALL manual classifications were found.\n' ) if state.get('noise'): # Technically, shouldn't be able to get here; # If we do, raise the error in order to fully exit raise e print('Would you like to:') print( "(1) Continue search for hypotheses with BEST coverage of manual classifications?" ) print('(2) Exit?') while True: try: ans = int(input('Your answer (1/2): ')) if (ans < 1 or ans > 2): raise ValueError break except ValueError: continue if (ans == 2): raise e else: state.set('noise', True) utils.noisifyExamplesFiles() runILASPCommands(state.get('labels'))
def classifyAllModels(modelsAbsPath): modelsStrings = utils.getModelsStrings(modelsAbsPath) utils.initProgressBar() numOfThreads = CLASSIFICATION_THREADS lockR = Lock() lockW = Lock() classFilePath = state.get('outputFilePath') labelsFile = open(classFilePath, 'w') labelsCounter = utils.getBlankLabelsCounter() threads = list() for tIdx in range(numOfThreads): threads.append( Thread(target=popModelsAndClassify, args=(modelsStrings, labelsFile, labelsCounter, lockR, lockW), daemon=True)) utils.printTitle( 'All initial models are about to be labelled, this might take a while.' ) for thread in threads: thread.start() for thread in threads: thread.join() labelsFile.write(state.get('classifOutput')) state.set('classifOutput', '') print('* All models have been succesfully labelled and saved in:\n' + classFilePath + '\n') labelsFile.close() labelKeys = list(labelsCounter.keys()) nonZeroLabels = [l for l in labelKeys if labelsCounter[l] > 0] labels = list( map(lambda l: l + ': ' + str(labelsCounter[l]), nonZeroLabels)) values = list(map(lambda l: labelsCounter[l], nonZeroLabels)) utils.generatePieChart(labels, values, title='Labels distribution')
def preProcessingFunc(): numOfArgs = len(sys.argv) mainScriptPath = sys.argv[0] try: inputFilePath = sys.argv[numOfArgs - 1] except IndexError: raise RuntimeError('No file with models provided!') utils.setParamsFromArgs(sys.argv[1:(numOfArgs - 1)]) state.set('mainScriptPath', utils.getAbsPath(mainScriptPath)) state.set('inputFilePath', utils.getAbsPath(inputFilePath)) state.set('outputFilePath', utils.computeClassFilePath()) utils.printTitle('Pre-processing of given file is about to begin.') tempDirPath = utils.createTempDirectory(mainScriptPath) state.set('tempDirPath', tempDirPath) if (not state.get('prenamedComponents')): nameComponentsInput = None while (nameComponentsInput != 'y' and nameComponentsInput != 'n'): nameComponentsInput = input( 'Would you like to ' + 'name the components for more human-readable class hypotheses? ' + '(y/n) ').lower() print() state.set('nameComponents', nameComponentsInput == 'y') clustersMap = preProcessing.parseInputFile() state.set('clusters', clustersMap) utils.initClusterWeights() utils.printTitle('Pre-processing of file complete!') while not len(state.get('relevantPatterns')): setRelevantPatterns() labels = utils.getAllLabelsFromUser() labelExamplesPaths = utils.createLabelExampleFiles(labels) state.set('labels', labels) state.set('labelExamplesPaths', labelExamplesPaths) utils.initUserLabelCounters() utils.setDefaultQuery() utils.printTitle('Thank you, classification process will now begin.')
# About # parse.com is a SAAS that provide a ready-to-use NOSQL backend # and related services, great for quick prototypes. Also, can be used # from several plataforms and languages # Official documentation # https://parse.com/docs/ import os, sys from utils import printTitle, printSubTitle, printExplain, printTab, printError printTitle("Parse.com is a NOSQL in the cloud") printExplain("You need to create a account at parse.com. Is free") print "https://parse.com/#signup" printExplain("And create a sample APP") print "https://parse.com/apps/new" printExplain("Change the values below to the ones supplied by Parse for your application") # Are in the 'settings' tab of each app APPLICATION_ID = "aOmfYWxfdaqrD9aMOtp7a3UinrfOAqNMyVxIjLzm" REST_API_KEY = "AkHGq6xQCd67e8Tj9xcXY3PirvzZstur7DBaURuX" if APPLICATION_ID == "APPLICATION_ID_HERE": printTitle("You need to create a parse app and supply the auth values") sys.exit(-1)
# Other FTS libraries, independent of the database engine # http://pythonhosted.org/Whoosh/ # http://lucene.apache.org/core/ # http://sphinxsearch.com/ import collections import datetime #import sqlite3 import pyodbc import sys from utils import printTitle, printSubTitle, printExplain, printTab, printError printTitle("You need to provide search functionality") # print """ # Your app/web site need a way to give search results, fast & easy # """ printSubTitle("First, we need a database") # This will run in the memory #conn = sqlite3.connect(':memory:') conn = pyodbc.connect('DRIVER={SQL Server Native Client 10.0};SERVER=RAMCOBL267;DATABASE=PYTHONDB;UID=sa;PWD=development12$') # conn.row_factory = sqlite3.Row def exeSql(sql):
# data, not about cralwers/robots. # Usefull links # https://en.wikipedia.org/wiki/Web_scraping # http://scrapy.org/ # http://www.crummy.com/software/BeautifulSoup/ # http://blog.hartleybrody.com/web-scraping/ # http://www.slideshare.net/TheVirendraRajput/web-scraping-in-python # http://www.robotstxt.org/ import os import requests from utils import printTitle, printSubTitle, printExplain, printTab printTitle("Before start this journey ") print """ If you are targeting public websites, try first looking for a official API, use a well know service that provide the same data (you can for example, use several google, yahoo, microsoft, etc... APIs to get web data). Review the terms of use of the site and respect the robot.txt rules and in general, do this well. Don't accept jobs for shaddy beahviour! """ printSubTitle("Get the webpage") printExplain( "You need to get the webpage is his raw HTML. This is the first step") print "In python, the request library is the best today" # http://runnable.com/Uri5rHtisU8kAA5S/command-the-web-with-python-requests-for-tutorial-beginner-http-scrapping-and-json
else: print("\n O arquivo de backup ainda não foi criado nesta pasta!") input("\n Pressione [ENTER] para voltar ao menu principal...") # Configura as janelas do Tkinter. utils.configDialogs() # Arquivo CMakeLists.txt. cmakelistsFile = None # Menu principal do programa. while True: utils.clearScreen() utils.printTitle() print(" Bem-vindo ao CMakeGL!") print("\n O CMakeGL é um programa CLI que gerencia projetos OpenGL.") print( " Utiliza as dependências necessárias, constrói os arquivos e compila tudo." ) print("\n Consulte a documentação em https://github.com/mlc2307/cmakegl") print(" Ou acesse o site http://www.opengl-tutorial.org") # Caso o programa foi executado agora. if cmakelistsFile is None: input("\n Pressione [ENTER] para continuar...") cmakelistsFile = "" continue
def computeHypotheses(): # By default, ask to classify a model to have something to work with newClassif() # This simulates a do-until loop; it asks the user to classify models until # they decide to stop, at which point the hypotheses are computed based on the # classified examples, and then outputted for the user to analyze. # At that point, the user can decide to go back to manual classification to # obtain better hypotheses (basically start the function over) or exit the loop # and use the hypotheses they have now in order to automatically classify all # the initial provided models while True: # Check if no further models available, compute hypotheses and return if true if (utils.clustersAreEmpty(state.get('clusters'))): utils.printTitle( 'No other models to classify available, computing hypotheses.') hypothesesToUpdate = list(state.get('hypothesesToUpdate')) if (len(hypothesesToUpdate)): runILASPCommands(hypothesesToUpdate) utils.printHypotheses() else: utils.printTitle( 'All new labels agree with the last hypotheses computed.') return continueClassif = input( '\nWould you like to classify another model? (y/n) ').lower() if (continueClassif == 'y'): print() newClassif() elif (continueClassif == 'n'): hypothesesToUpdate = list(state.get('hypothesesToUpdate')) if (len(hypothesesToUpdate)): runILASPCommands(hypothesesToUpdate) utils.printHypotheses() state.set('labelPredictionsUpdated', False) else: utils.printTitle( 'All new labels agree with the last hypotheses computed.') utils.checkAndRecluster() utils.resetMustLabelModels() print('Would you like to:') print( '(1) Continue classification to improve current class hypotheses?' ) print( '(2) Use current hypotheses to automatically classify all initial data?' ) while True: try: ans = int(input('Your answer (1/2): ')) if (ans < 1 or ans > 2): raise ValueError break except ValueError: continue if (ans == 1): print() computeHypotheses() return