Exemple #1
0
def updateCache(query):
    genericQuery = computeGenericQuery(query)
    modelStrings = utils.getModelsStrings(state.get('inputFilePath'))
    utils.initProgressBar()
    numOfThreads = CLASSIFICATION_THREADS
    lockR = Lock()
    lockW = Lock()

    # validModels will contain all models 'valid' in the sense that they comply with
    # the query constraints, and are also not among the already labelled models
    validModels = list()

    threads = list()
    for tIdx in range(numOfThreads):
        threads.append(
            Thread(target=popModelsAndCheckQuery,
                   args=(modelStrings, genericQuery, lockR, lockW,
                         validModels),
                   daemon=True))
    utils.printTitle(
        'Searching for complying models, this might take a while.')
    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()

    randomSampleSize = min(len(validModels), QUERY_CACHE_SIZE)
    cache = random.sample(validModels, randomSampleSize)
    state.set('queryCache', cache)
    state.set('ranAQuery', True)

    # Update preQuery to have it appear by default in the query editor
    # next time the user wants to use it
    state.set('prevQuery', query)
def runILASPCommands(labelsToUpdateHypotheses):
    utils.printTitle(
        'Please wait while the hypotheses are being computed, this might take a while.'
    )

    backGroundStr = utils.getBackgroundString()
    biasConstantsStr = utils.computeBiasConstants()
    genericBiasStr = utils.getBiasString().replace('$$CONSTANTS$$',
                                                   biasConstantsStr)
    outputs = {}
    lock = Lock()
    threads = list()

    for label in labelsToUpdateHypotheses:
        threads.append(
            Thread(target=runILASPCMDInThread,
                   args=(backGroundStr, genericBiasStr, label, outputs, lock),
                   daemon=True))

    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()

    try:
        utils.updateHypotheses(outputs)
        state.get('hypothesesToUpdate').clear()
    except ExitError as e:
        print(
            '** Error: No hypotheses covering ALL manual classifications were found.\n'
        )
        if state.get('noise'):
            # Technically, shouldn't be able to get here;
            # If we do, raise the error in order to fully exit
            raise e

        print('Would you like to:')
        print(
            "(1) Continue search for hypotheses with BEST coverage of manual classifications?"
        )
        print('(2) Exit?')
        while True:
            try:
                ans = int(input('Your answer (1/2): '))
                if (ans < 1 or ans > 2):
                    raise ValueError
                break
            except ValueError:
                continue
        if (ans == 2):
            raise e
        else:
            state.set('noise', True)
            utils.noisifyExamplesFiles()
            runILASPCommands(state.get('labels'))
Exemple #3
0
def classifyAllModels(modelsAbsPath):
    modelsStrings = utils.getModelsStrings(modelsAbsPath)
    utils.initProgressBar()

    numOfThreads = CLASSIFICATION_THREADS
    lockR = Lock()
    lockW = Lock()

    classFilePath = state.get('outputFilePath')
    labelsFile = open(classFilePath, 'w')
    labelsCounter = utils.getBlankLabelsCounter()

    threads = list()
    for tIdx in range(numOfThreads):
        threads.append(
            Thread(target=popModelsAndClassify,
                   args=(modelsStrings, labelsFile, labelsCounter, lockR,
                         lockW),
                   daemon=True))

    utils.printTitle(
        'All initial models are about to be labelled, this might take a while.'
    )

    for thread in threads:
        thread.start()

    for thread in threads:
        thread.join()

    labelsFile.write(state.get('classifOutput'))
    state.set('classifOutput', '')

    print('* All models have been succesfully labelled and saved in:\n' +
          classFilePath + '\n')
    labelsFile.close()

    labelKeys = list(labelsCounter.keys())
    nonZeroLabels = [l for l in labelKeys if labelsCounter[l] > 0]

    labels = list(
        map(lambda l: l + ': ' + str(labelsCounter[l]), nonZeroLabels))
    values = list(map(lambda l: labelsCounter[l], nonZeroLabels))
    utils.generatePieChart(labels, values, title='Labels distribution')
Exemple #4
0
def preProcessingFunc():
    numOfArgs = len(sys.argv)
    mainScriptPath = sys.argv[0]

    try:
        inputFilePath = sys.argv[numOfArgs - 1]
    except IndexError:
        raise RuntimeError('No file with models provided!')

    utils.setParamsFromArgs(sys.argv[1:(numOfArgs - 1)])

    state.set('mainScriptPath', utils.getAbsPath(mainScriptPath))
    state.set('inputFilePath', utils.getAbsPath(inputFilePath))
    state.set('outputFilePath', utils.computeClassFilePath())

    utils.printTitle('Pre-processing of given file is about to begin.')
    tempDirPath = utils.createTempDirectory(mainScriptPath)
    state.set('tempDirPath', tempDirPath)

    if (not state.get('prenamedComponents')):
        nameComponentsInput = None
        while (nameComponentsInput != 'y' and nameComponentsInput != 'n'):
            nameComponentsInput = input(
                'Would you like to ' +
                'name the components for more human-readable class hypotheses? '
                + '(y/n) ').lower()
            print()
        state.set('nameComponents', nameComponentsInput == 'y')

    clustersMap = preProcessing.parseInputFile()
    state.set('clusters', clustersMap)
    utils.initClusterWeights()

    utils.printTitle('Pre-processing of file complete!')

    while not len(state.get('relevantPatterns')):
        setRelevantPatterns()

    labels = utils.getAllLabelsFromUser()
    labelExamplesPaths = utils.createLabelExampleFiles(labels)

    state.set('labels', labels)
    state.set('labelExamplesPaths', labelExamplesPaths)
    utils.initUserLabelCounters()

    utils.setDefaultQuery()

    utils.printTitle('Thank you, classification process will now begin.')
Exemple #5
0
# About

# parse.com is a SAAS that provide a ready-to-use NOSQL backend
# and related services, great for quick prototypes. Also, can be used
# from several plataforms and languages

# Official documentation
# https://parse.com/docs/


import os, sys

from utils import printTitle, printSubTitle, printExplain, printTab, printError

printTitle("Parse.com is a NOSQL in the cloud")
printExplain("You need to create a account at parse.com. Is free")
print "https://parse.com/#signup"

printExplain("And create a sample APP")
print "https://parse.com/apps/new"
printExplain("Change the values below to the ones supplied by Parse for your application")
# Are in the 'settings' tab of each app

APPLICATION_ID = "aOmfYWxfdaqrD9aMOtp7a3UinrfOAqNMyVxIjLzm"
REST_API_KEY = "AkHGq6xQCd67e8Tj9xcXY3PirvzZstur7DBaURuX"

if APPLICATION_ID == "APPLICATION_ID_HERE":
    printTitle("You need to create a parse app and supply the auth values")
    sys.exit(-1)
Exemple #6
0
# Other FTS libraries, independent of the database engine
# http://pythonhosted.org/Whoosh/
# http://lucene.apache.org/core/
# http://sphinxsearch.com/

import collections
import datetime
#import sqlite3
import pyodbc
import sys


from utils import printTitle, printSubTitle, printExplain, printTab, printError

printTitle("You need to provide search functionality")
# print """
# Your app/web site need a way to give search results, fast & easy
# """

printSubTitle("First, we need a database")
# This will run in the memory

#conn = sqlite3.connect(':memory:')

conn = pyodbc.connect('DRIVER={SQL Server Native Client 10.0};SERVER=RAMCOBL267;DATABASE=PYTHONDB;UID=sa;PWD=development12$')

# conn.row_factory = sqlite3.Row


def exeSql(sql):
Exemple #7
0
# data, not about cralwers/robots.

# Usefull links
# https://en.wikipedia.org/wiki/Web_scraping
# http://scrapy.org/
# http://www.crummy.com/software/BeautifulSoup/
# http://blog.hartleybrody.com/web-scraping/
# http://www.slideshare.net/TheVirendraRajput/web-scraping-in-python
# http://www.robotstxt.org/

import os
import requests

from utils import printTitle, printSubTitle, printExplain, printTab

printTitle("Before start this journey ")

print """
If you are targeting public websites, try first looking for a official API,
use a well know service that provide the same data (you can for example, use several
google, yahoo, microsoft, etc... APIs to get web data). Review the terms of use of the site
and respect the robot.txt rules and in general, do this well.

Don't accept jobs for shaddy beahviour!
"""

printSubTitle("Get the webpage")
printExplain(
    "You need to get the webpage is his raw HTML. This is the first step")
print "In python, the request library is the best today"
# http://runnable.com/Uri5rHtisU8kAA5S/command-the-web-with-python-requests-for-tutorial-beginner-http-scrapping-and-json
Exemple #8
0
    else:
        print("\n  O arquivo de backup ainda não foi criado nesta pasta!")

    input("\n  Pressione [ENTER] para voltar ao menu principal...")


# Configura as janelas do Tkinter.
utils.configDialogs()

# Arquivo CMakeLists.txt.
cmakelistsFile = None

# Menu principal do programa.
while True:
    utils.clearScreen()
    utils.printTitle()

    print("  Bem-vindo ao CMakeGL!")

    print("\n  O CMakeGL é um programa CLI que gerencia projetos OpenGL.")
    print(
        "  Utiliza as dependências necessárias, constrói os arquivos e compila tudo."
    )
    print("\n  Consulte a documentação em https://github.com/mlc2307/cmakegl")
    print("  Ou acesse o site http://www.opengl-tutorial.org")

    # Caso o programa foi executado agora.
    if cmakelistsFile is None:
        input("\n  Pressione [ENTER] para continuar...")
        cmakelistsFile = ""
        continue
def computeHypotheses():
    # By default, ask to classify a model to have something to work with
    newClassif()

    # This simulates a do-until loop; it asks the user to classify models until
    # they decide to stop, at which point the hypotheses are computed based on the
    # classified examples, and then outputted for the user to analyze.
    # At that point, the user can decide to go back to manual classification to
    # obtain better hypotheses (basically start the function over) or exit the loop
    # and use the hypotheses they have now in order to automatically classify all
    # the initial provided models
    while True:
        # Check if no further models available, compute hypotheses and return if true
        if (utils.clustersAreEmpty(state.get('clusters'))):
            utils.printTitle(
                'No other models to classify available, computing hypotheses.')
            hypothesesToUpdate = list(state.get('hypothesesToUpdate'))

            if (len(hypothesesToUpdate)):
                runILASPCommands(hypothesesToUpdate)
                utils.printHypotheses()
            else:
                utils.printTitle(
                    'All new labels agree with the last hypotheses computed.')
            return

        continueClassif = input(
            '\nWould you like to classify another model? (y/n) ').lower()
        if (continueClassif == 'y'):
            print()
            newClassif()
        elif (continueClassif == 'n'):
            hypothesesToUpdate = list(state.get('hypothesesToUpdate'))
            if (len(hypothesesToUpdate)):
                runILASPCommands(hypothesesToUpdate)
                utils.printHypotheses()
                state.set('labelPredictionsUpdated', False)
            else:
                utils.printTitle(
                    'All new labels agree with the last hypotheses computed.')

            utils.checkAndRecluster()
            utils.resetMustLabelModels()

            print('Would you like to:')
            print(
                '(1) Continue classification to improve current class hypotheses?'
            )
            print(
                '(2) Use current hypotheses to automatically classify all initial data?'
            )
            while True:
                try:
                    ans = int(input('Your answer (1/2): '))
                    if (ans < 1 or ans > 2):
                        raise ValueError
                    break
                except ValueError:
                    continue

            if (ans == 1):
                print()
                computeHypotheses()
            return