コード例 #1
0
def classify_batch(text_list, glove_array=None, glove_dict=None):
    original_length = len(text_list)
    while len(text_list) < BATCH_SIZE:
        text_list.append("")

    if glove_array == None or glove_dict == None:
        glove_array, glove_dict = load_glove_embeddings()
    data_text = [imp.preprocess(review) for review in text_list]
    test_data = embedd_data(data_text, glove_array, glove_dict)
    sess = tf.InteractiveSession()
    last_check = tf.train.latest_checkpoint(dir_path + '/checkpoints')
    saver = tf.train.import_meta_graph(last_check + ".meta")
    saver.restore(sess, last_check)
    graph = tf.get_default_graph()

    prediction = graph.get_tensor_by_name('prediction:0')

    input_data = graph.get_tensor_by_name('input_data:0')

    Y = sess.run([prediction], {input_data: test_data})
    maxValue = float('-inf')
    results = []
    for i in range(original_length):
        for j in range(5):
            if maxValue < Y[0][i][j]:
                maxValue = Y[0][i][j]
                index = j + 1
        results.append(index)
        maxValue = float('-inf')
        index = 0

    print(results)
    return results
コード例 #2
0
def load_data(path=dir_path + '/data/train'):
    print("Loading Review Data...")
    data = []

    dir = os.path.dirname(__file__)
    file_list = glob.glob(os.path.join(dir, path + '/1/*'))
    file_list.extend(glob.glob(os.path.join(dir, path + '/2/*')))
    file_list.extend(glob.glob(os.path.join(dir, path + '/3/*')))
    file_list.extend(glob.glob(os.path.join(dir, path + '/4/*')))
    file_list.extend(glob.glob(os.path.join(dir, path + '/5/*')))
    print("Parsing %s files" % len(file_list))
    for _, f in enumerate(file_list):
        with open(f, "r") as openf:
            s = openf.read()
            data.append(imp.preprocess(
                s))  # NOTE: Preprocessing code called here on all reviews
    return data
コード例 #3
0
def load_data(path='./data/train'):
    """
    Load raw reviews from text files, and apply preprocessing
    Append positive reviews first, and negative reviews second
    RETURN: List of strings where each element is a preprocessed review.
    """
    print("Loading IMDB Data...")
    data = []

    dir = os.path.dirname(__file__)
    file_list = glob.glob(os.path.join(dir, path + '/pos/*'))
    file_list.extend(glob.glob(os.path.join(dir, path + '/neg/*')))
    print("Parsing %s files" % len(file_list))
    for i, f in enumerate(file_list):
        with open(f, "r") as openf:
            s = openf.read()
            data.append(imp.preprocess(s))  # NOTE: Preprocessing code called here on all reviews
    return data
コード例 #4
0
def load_zip(name='data.zip', dataset='train'):
    """
    Load raw reviews from text files, and apply preprocessing
    Append positive reviews first, and negative reviews second
    RETURN: List of strings where each element is a preprocessed review.
    """
    print("Loading IMDB Data...")
    data = []

    # data_zip = zp.ZipFile(name)
    with zp.ZipFile(name) as data_zip:
        for path in data_zip.namelist():
            path_split = path.split('/')
            # print(path_split)
            if path_split[1] == dataset and path_split[-1] != '':

                with data_zip.open('/'.join(path_split)) as f:
                    s = f.read()
                    data.append(imp.preprocess(s.decode()))
    return data
コード例 #5
0
def classifyAll():
    allRatings = Rating.objects.all()

    allRatingsToClassify = [r for r in allRatings if int(r.value) == 0]
    allTextsToClassify = [r.notes for r in allRatingsToClassify]

    while len(allTextsToClassify)%100 != 0:
        allTextsToClassify.append("")

    while len(allRatingsToClassify)%100 != 0:
        allRatingsToClassify.append(None)

    print(len(allTextsToClassify))

    texts_length = len(allTextsToClassify)
    numBatches = texts_length//100

    data_text = [imp.preprocess(review) for review in allTextsToClassify]

    glove_array, glove_dict = load_glove_embeddings()
    test_data = embedd_data(data_text, glove_array, glove_dict)

    sess = tf.InteractiveSession()

    last_check = tf.train.latest_checkpoint('./checkpoints')
    saver = tf.train.import_meta_graph(last_check + ".meta")
    saver.restore(sess, last_check)
    graph = tf.get_default_graph()

    prediction = graph.get_tensor_by_name('prediction:0')
    input_data = graph.get_tensor_by_name('input_data:0')

    for i in range(numBatches):
        startIndex = i*100
        endIndex = (i+1)*100

        ratingsBatch = allRatingsToClassify[startIndex:endIndex]
        inputBatch = test_data[startIndex:endIndex]

        Y = sess.run([prediction], {input_data: inputBatch})
        
        maxValue = float('-inf')
        results = []

        for i in range(100):
            for j in range(5):
                if maxValue < Y[0][i][j]:
                    maxValue = Y[0][i][j]
                    index = j + 1
            results.append(index)
            maxValue = float('-inf')
            index = 0  
        predicts = results
        print(predicts)

        for i in range(100):
            rating = ratingsBatch[i]
            if rating == None:
                continue
            classification = predicts[i]
            rating.value = classification
            rating.save()
コード例 #6
0
def main():
    processed_review = preprocess(prostr)
    print(processed_review)