Ejemplo n.º 1
0
    def save_model(self):

        print('Saving DetectorCNN model definition to file:',
              self.model_definition)

        model_dict = {
            'batch_size': self.batch_size,
            'learning_rate': self.learning_rate,
            'regularization_rate': self.regularization_rate,
            'dropout_rate': self.dropout_rate,
            'model_definition': self.model_definition,
            'model_weights': self.model_weights,
            'model_tb_log': self.tb_log_dir,
            'X_mean': 117.18408,
            'X_std': 27.43093
        }

        utils.ensure_directory(self.MODEL_DIR)
        utils.serialize(model_dict, self.model_definition)

        return
Ejemplo n.º 2
0
def db_aggregate(**kwargs):
    check_cache_age()
    global cache_aggregate, cache_aggregate_permanent
    key = utilities.serialize(kwargs)
    if "timerange" in kwargs and not kwargs["timerange"].active():
        if key in cache_aggregate_permanent:
            return copy.copy(cache_aggregate_permanent.get(key))
        result = db_aggregate_full(**kwargs)
        cache_aggregate_permanent.add(key, copy.copy(result))
    else:
        if key in cache_aggregate: return copy.copy(cache_aggregate[key])
        result = db_aggregate_full(**kwargs)
        cache_aggregate[key] = copy.copy(result)

    return result
Ejemplo n.º 3
0
    def authNewClient(self, clientFD):
        # Code Section
        authMessage = clientFD.recv(1024)

        self.mutex.acquire()                            # Lock Access to DB
        serverAuth = authentication.ServerAuth()        # Create auth instance
        result = serverAuth.authenticate(authMessage)   # Authenticate
        self.mutex.release()                            # Unlock Access to DB
        status = result["status"]

        self.send(clientFD, Utilities.serialize(result))

        if(not status):
            self.closeConnection()
            Utilities.logger("Authentication failed")

        return status
Ejemplo n.º 4
0
def db_query(**kwargs):
    check_cache_age()
    global cache_query, cache_query_permanent
    key = utilities.serialize(kwargs)
    if "timerange" in kwargs and not kwargs["timerange"].active():
        if key in cache_query_permanent:
            #print("Hit")
            return copy.copy(cache_query_permanent.get(key))
        #print("Miss")
        result = db_query_full(**kwargs)
        cache_query_permanent.add(key, copy.copy(result))
        #print(cache_query_permanent.cache)
    else:
        #print("I guess they never miss huh")
        if key in cache_query: return copy.copy(cache_query[key])
        result = db_query_full(**kwargs)
        cache_query[key] = copy.copy(result)

    return result
Ejemplo n.º 5
0
 def getStreamFromJob(self, job):
     return pickle.dumps(serialize(job))
Ejemplo n.º 6
0
def index(config):
    analyzer = createAnalyzer(config["analyzer-options"])
    mappedSentences = loadDocs(
        config["docs-file"])  # TODO implement loadDocs()
    w2pBigramsModelLocation = config["w2p-model"]["W2P-model-file-bigrams"]
    w2pTrigramsModelLocation = config["w2p-model"]["W2P-model-file-trigrams"]

    listOfValues = []
    for dataPoint in mappedSentences.items():
        for sentence in dataPoint[1]:
            for entry in analyzer.analyze(dataPoint[0], sentence):
                listOfValues.append(entry)

    indexDict = {}  # TODO
    docSizeDict = {}  # TODO

    for obj in listOfValues:

        if obj.getDataPoint() not in docSizeDict:
            docSizeDict[obj.getDataPoint()] = 1
        else:
            docSizeDict[obj.getDataPoint()] += 1

        indexTerm = obj.getForm().lower()
        if indexTerm not in indexDict:
            indexDict[indexTerm] = {}
            data = []
            inFileOccurences = {}
            inFileOccurences["doc-id"] = obj.getDataPoint()
            inFileOccurences["tf"] = 1
            data.append(inFileOccurences)
            indexDict[indexTerm] = {}
            indexDict[indexTerm]["data"] = data
        else:
            for row in indexDict.items():
                if row[0] == indexTerm:
                    foundFlag = False
                    for docs in row[1]["data"]:
                        if docs["doc-id"] == obj.getDataPoint():
                            docs["tf"] += 1
                            foundFlag = True
                    if foundFlag is False:
                        newFileOccurences = {}
                        newFileOccurences["doc-id"] = obj.getDataPoint()
                        newFileOccurences["tf"] = 1
                        row[1]["data"].append(newFileOccurences)

    totalDocsSize = len(docSizeDict)
    modifiedIndexDict = indexDict.copy()
    avrgD = len(listOfValues) / totalDocsSize

    import math
    counter = 0
    for entry in indexDict.items():
        for row in entry[1]["data"]:
            docID = row["doc-id"]
            locatedInFileCount = len(entry[1]["data"])
            word = entry[0]
            computedValue = totalDocsSize / (locatedInFileCount + 1)
            idfValue = math.log(computedValue, 10)
        modifiedIndexDict[word]["idf"] = idfValue
        modifiedIndexDict[word]["id"] = counter
        counter += 1
    index = Index(indexDict, docSizeDict, avrgD)

    newIdexedJsonText = json.dumps(indexDict, indent=4)
    jsonPath = Path(config["engine-options"]["index-json"])
    if not os.path.exists(os.path.join(str(jsonPath))):
        os.makedirs(os.path.join(str(jsonPath)))
    with open(os.path.join(str(jsonPath), "INDEX.JSON"), 'w') as outfile:
        outfile.write(newIdexedJsonText)

    outputIndexPath = config["engine-options"]["index-file"]
    picklePath = Path(outputIndexPath)
    if not os.path.exists(os.path.join(str(picklePath))):
        os.makedirs(os.path.join(str(picklePath)))
    serialize(index, os.path.join(str(picklePath), "PICKLE.PKL"))
Ejemplo n.º 7
0
 def getStreamFromTree(self, tree):
     return repr(serialize(tree))
Ejemplo n.º 8
0
 def _getDictFromJob(self, job):
     return serialize(job)
# Get 16 random values from 0 to 100 without replacement
valid_examples = np.random.choice(valid_window, valid_size, replace=False)

if TO_SERIALIZE:

    raw_data = read_data(TRAIN_DIR, nfiles=c_nfiles)

    # Build the dataset and the dictionaries form the raw data
    data, dictionary, reverse_dictionary = build_dataset(
        raw_data, VOCABULARY_SIZE)

    del raw_data  # To reduce memory.

    # To avoid reading the whole dataset again an again
    print("Serializing the data.")
    serialize(DATA_DICT_SERIALIZATION, (data, dictionary, reverse_dictionary))
else:
    print("Reading serialization: ")
    data, dictionary, reverse_dictionary = deserialize(DATA_DICT_SERIALIZATION)

# Stores some informations about the actual test configuration
configuration = 'BATCH_SIZE: ' + str(BATCH_SIZE) + ' EMBEDDING_SIZE: ' + str(EMBEDDING_SIZE) + ' WINDOW_SIZE: ' \
                + str(WINDOW_SIZE) + ' VOCABULARY_SIZE ' + str(VOCABULARY_SIZE) + ' nfiles: ' + str(c_nfiles) + \
                ' stopwd: ' + str(c_stopwd) + ' shuffle_docs: ' + str(c_shuffle_docs) + ' dataset_size ' + str(len(data))

print("CONFIG: " + configuration)

# Read the question file for the Analogical Reasoning evaluation
questions = read_analogies(ANALOGIES_FILE, dictionary)

# ------------------------------------------ MODEL DEFINITION --------------------------------------------------------