Exemple #1
0
def createCollection(articlesPath=ARTICLES_DIR_NAME):
    if not os.path.exists(articlesPath):
        print('ОШИБКА: не получается найти статьи')
        return -1
    normalizedArrayOfDocRes, tfCounter = morph.normalizeArrayOfDoc(
        extractText(articlesPath).split(SPLIT_SENTENCES))
    utils.writeToFile(COLLECTIONS_FILE_NAME, normalizedArrayOfDocRes)
    utils.writeToFile(TF_FILE_NAME, tfCounter)
    return 0
Exemple #2
0
def main(argv):
    team = argv[0] if argv[0] != 'overall' else {'$exists': True}
    teamName = argv[0] if argv[0] != 'overall' else 'overall'

    print('Connecting to Mongo')

    client = pymongo.MongoClient(os.getenv('MONGO_URI'),
                                 ssl_cert_reqs=ssl.CERT_NONE)
    collection = client.epilog.data

    print('fetching metadata')

    doc_count = collection.estimated_document_count()

    print('loading all data from database')

    printProgressBar(0,
                     doc_count,
                     prefix='Progress:',
                     suffix='Complete',
                     length=50)

    data = defaultdict(def_value)

    index = 0
    for doc in collection.find({
            'experimentLabel': team,
            'x': {
                '$exists': True
            },
            'y': {
                '$exists': True
            },
            'z': {
                '$exists': True
            }
    }).sort('time', pymongo.ASCENDING):
        printProgressBar(index,
                         doc_count,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
        index = index + 1

        x = int(doc['x'])
        y = int(doc['z'])

        data[x, y] += 1

    data = sorted(data.items(), key=lambda key: key[0])

    print('')

    print('Writing data to file')
    writeToFile(data, teamName, 'csv')

    return 0
Exemple #3
0
def ast(root: State) -> None:
    start_time = time()

    path_to_goal: Deque[str] = deque()
    cost_of_path: int = 0
    nodes_expanded: int = 0
    search_depth: int = 0
    max_search_depth: int = 0
    running_time: float = 0
    max_ram_usage: float = 0

    fringe: List[State] = [root]
    heapify(fringe)
    visited: List[State] = []

    while len(fringe) > 0:
        currentState: State = heappop(fringe)
        visited.append(currentState)

        if currentState.isGoal():
            end_time = time()

            while currentState.direction is not None:
                path_to_goal.appendleft(currentState.direction)
                currentState = currentState.parent
            cost_of_path = len(path_to_goal)
            path_to_goal = f'{path_to_goal}'.replace('deque(',
                                                     '').replace(')', '')
            nodes_expanded = len(visited) - 1
            for state in fringe:
                if state.depth > search_depth:
                    search_depth = state.depth
            max_search_depth = search_depth
            for state in visited:
                if state.depth > max_search_depth:
                    max_search_depth = state.depth
            running_time = end_time - start_time
            max_ram_usage = getrusage(RUSAGE_SELF).ru_maxrss / 1024
            writeToFile(path_to_goal, cost_of_path, nodes_expanded,
                        search_depth, max_search_depth, running_time,
                        max_ram_usage)
            return

        for childState in currentState.getChildren():
            if (childState is not None) and (childState not in visited):
                if childState not in fringe:
                    heappush(fringe, childState)
                else:
                    item = fringe[fringe.index(childState)]
                    if childState < item:
                        item = childState
                        heapify(fringe)
Exemple #4
0
def main():
    '''Run orangfuzz and randomly generate lines for the desired device.'''
    args = parseArgs()
    if args.seed is None:
        args.seed = int(math.floor(random.random() * math.pow(2, 28)))
    rndObj = random.Random(args.seed)

    orangDevice = Unagi()
    allLines = []

    allLines = prepopulateStart(orangDevice, rndObj, allLines)
    allLines = generateLines(args, orangDevice, rndObj, allLines)

    writeToFile(args, allLines)
Exemple #5
0
def plot_statistics(statistics_dicts, out_filename):
    tex_str = ''
    offset_strs = list(statistics_dicts.keys())
    offset_strs.sort()
    for offset_str in offset_strs:
        statistics_dict = statistics_dicts[offset_str]
        keys = statistics_dict.keys()

        # all but locally converged (2) and early stopped (5) count as possibly crossed
        lines = ['({}, {})'.format(n_hidden, 1.0 - ((statistics_dict[n_hidden][2] + statistics_dict[n_hidden][5]) / np.sum(statistics_dict[n_hidden])))
                 for n_hidden in np.sort(np.array(list(keys)))]
        tex_str += '\\addplot coordinates {\n' + '\n'.join(lines) + '};\n\\addlegendentry{$\\Delta = ' + offset_str + '$}\n'

    print('LaTeX code excerpt:')
    print(tex_str)

    tex_str = utils.readFromFile('tex_head.txt') + tex_str + utils.readFromFile('tex_tail.txt')
    utils.writeToFile(out_filename, tex_str)
Exemple #6
0
def baselinePerformance(labeled_data, k):
    execution_times = []

    # CUSTOM K-MEANS CLUSTERING ALGORITHM
    start_time = time.time()
    custom_clusters = clustering.k_means_clustering(labeled_data, k)
    execution_times.append(time.time() - start_time)

    # SKLEARN K-MEANS CLUSTERING ALGORITHM
    start_time = time.time()
    sklearn_clusters = clustering.sklearnKMeansClustering(labeled_data, k)
    execution_times.append(time.time() - start_time)

    # SKLEARN AGGLOMERATIVE CLUSTERING ALGORITHM
    start_time = time.time()
    agglomerative_clusters = clustering.sklearnAgglomerativeClustering(labeled_data, k)
    execution_times.append(time.time() - start_time)

    utils.writeToFile(config.CUSTOM_KMEANS_OUTPUT, custom_clusters)
    utils.writeToFile(config.SKLEARN_KMEANS_OUTPUT, sklearn_clusters)
    utils.writeToFile(config.AGGLOMERATIVE_KMEANS_OUTPUT, agglomerative_clusters)

    if config.DEBUG:
        print("custom_clusters:\n", custom_clusters)
        print("sklearn_clusters:\n", sklearn_clusters)
        print("agglomerative_clusters:\n", agglomerative_clusters)

    # [time1, time2, time3] # time1 = CUSTOM K-MEANS, time2 = SKLEARN K-MEANS, time3 = # SKLEARN AGGLOMERATIVE
    return [custom_clusters, sklearn_clusters, agglomerative_clusters], execution_times
Exemple #7
0
def main():
    execution_times = []
    file_names = utils.loadFileNames()
    mfccs = loadMFCCSValues(file_names)
    labeled_data = utils.createLabeledData(file_names, mfccs)

    # Run sklearn DBSCAN algorithm with default parameters, since everything is considered noise, no k value is taken
    dbscan_clusters = clustering.sklearnDBSCAN(labeled_data, 0.5)
    utils.writeToFile(config.DBSCAN_KMEANS_OUTPUT_DEFAULT_PARAMETERS, dbscan_clusters)

    # Run sklearn DBSCAN algorithm with EPS = 25, write to file and get the k value from it
    start_time = time.time()
    dbscan_clusters = clustering.sklearnDBSCAN(labeled_data, config.DBSCAN_EPS)
    execution_times.append(time.time() - start_time)
    utils.writeToFile(config.DBSCAN_KMEANS_OUTPUT_EPS, dbscan_clusters)
    k = len(dbscan_clusters)

    results = baselinePerformance(labeled_data, k)
    execution_times += results[1]
    execution_times = ["%.4f" % member for member in execution_times]

    custom_clusters = results[0][0]
    sklearn_clusters = results[0][1]
    agglomerative_clusters = results[0][2]
    custom_clusters = sorted(custom_clusters)
    sklearn_clusters = sorted(sklearn_clusters)
    agglomerative_clusters = sorted(agglomerative_clusters)
    dbscan_clusters = sorted(dbscan_clusters)

    if config.VERBOSE:
        print("dbscan_clusters:\n", dbscan_clusters)
        print("custom_clusters:\n", custom_clusters)
        print("sklearn_clusters:\n", sklearn_clusters)
        print("agglomerative_clusters:\n", agglomerative_clusters)
        print("execution_times:\n", execution_times)

    goodPerformance(file_names, dbscan_clusters, custom_clusters, sklearn_clusters, agglomerative_clusters)
def main(argv):
    team = argv[0]

    print('Connecting to Mongo')

    client = pymongo.MongoClient(os.getenv('MONGO_URI'),
                                 ssl_cert_reqs=ssl.CERT_NONE)
    collection = client.epilog.data

    print('fetching metadata')

    mongo_filter = {'experimentLabel': team, 'event': 'PlayerLocationEvent'}
    # mongo_filter = {'event': 'PlayerLocationEvent'}

    for time in collection.find(mongo_filter).sort('time',
                                                   pymongo.ASCENDING).limit(1):
        _time = math.floor(time['time'] / 1000)
        if time_offset:
            _time_offset = _time
            min_time = 0
        else:
            min_time = _time
    for time in collection.find(mongo_filter).sort(
            'time', pymongo.DESCENDING).limit(1):
        if time_offset:
            max_time = math.floor(time['time'] / 1000) - _time_offset
        else:
            max_time = math.floor(time['time'] / 1000)

    doc_count = collection.estimated_document_count()

    print('loading all data from database')

    # printProgressBar(0, doc_count, prefix='Progress:',
    #                  suffix='Complete', length=50)

    data = dict()
    data['time'] = {'min': min_time, 'max': max_time}
    data['timeline'] = dict()

    index = 0
    for doc in collection.find(mongo_filter).sort('time', pymongo.ASCENDING):
        printProgressBar(index,
                         doc_count,
                         prefix='Progress:',
                         suffix='Complete',
                         length=50)
        index = index + 1

        if doc['event'] != "PlayerLocationEvent":
            continue

        doc['_id'] = str(doc['_id'])

        if time_offset:
            time = str(math.floor(doc['time'] / 1000) - _time_offset)
            doc['time'] = doc['time'] - (_time_offset * 1000)
        else:
            time = str(math.floor(doc['time'] / 1000))

        if time in data['timeline']:
            data['timeline'][time].append(doc)
        else:
            data['timeline'][time] = [doc]
    print('')

    print('Writing data to file')
    writeToFile(data, team, 'json')

    return 0
Exemple #9
0
def main():

    global args, max_length
    args = parser.parse_args()

    if args.eval:

        if not os.path.exists(args.output_dir):
            print("Output directory do not exists")
            exit(0)
        try:
            model = EncoderDecoder().load(args.output_dir)
            print("Model loaded successfully")
        except:
            print("The trained model could not be loaded...")
            exit()

        test_pairs = readFile(args.test_file)

        outputs = model.evaluatePairs(test_pairs, rand=False, char=args.char)
        writeToFile(outputs, os.path.join(args.output_dir, "output.pkl"))
        reference = []
        hypothesis = []

        for (hyp, ref) in outputs:
            if args.char or args.char_bleu:
                reference.append([list(ref)])
                hypothesis.append(list(hyp))
            else:
                reference.append([ref.split(" ")])
                hypothesis.append(hyp.split(" "))

        bleu_score = compute_bleu(reference, hypothesis)
        print("Bleu Score: " + str(bleu_score))

        print(
            model.evaluateAndShowAttention(
                "L'anglais n'est pas facile pour nous.", char=args.char))
        print(
            model.evaluateAndShowAttention(
                "J'ai dit que l'anglais est facile.", char=args.char))
        print(
            model.evaluateAndShowAttention(
                "Je n'ai pas dit que l'anglais est une langue facile.",
                char=args.char))
        print(
            model.evaluateAndShowAttention("Je fais un blocage sur l'anglais.",
                                           char=args.char))

    else:
        input_lang, output_lang, pairs = prepareData(args.train_file)

        print(random.choice(pairs))

        if args.char:
            model = EncoderDecoder(args.hidden_size, input_lang.n_chars,
                                   output_lang.n_chars, args.drop, args.tfr,
                                   args.max_length, args.lr, args.simple,
                                   args.bidirectional, args.dot, False, 1)
        else:
            model = EncoderDecoder(args.hidden_size, input_lang.n_words,
                                   output_lang.n_words, args.drop, args.tfr,
                                   args.max_length, args.lr, args.simple,
                                   args.bidirectional, args.dot, args.multi,
                                   args.num_layers)

        model.trainIters(pairs,
                         input_lang,
                         output_lang,
                         args.n_iters,
                         print_every=args.print_every,
                         plot_every=args.plot_every,
                         char=args.char)
        model.save(args.output_dir)
        model.evaluatePairs(pairs, char=args.char)