def classify_batch(text_list, glove_array=None, glove_dict=None): original_length = len(text_list) while len(text_list) < BATCH_SIZE: text_list.append("") if glove_array == None or glove_dict == None: glove_array, glove_dict = load_glove_embeddings() data_text = [imp.preprocess(review) for review in text_list] test_data = embedd_data(data_text, glove_array, glove_dict) sess = tf.InteractiveSession() last_check = tf.train.latest_checkpoint(dir_path + '/checkpoints') saver = tf.train.import_meta_graph(last_check + ".meta") saver.restore(sess, last_check) graph = tf.get_default_graph() prediction = graph.get_tensor_by_name('prediction:0') input_data = graph.get_tensor_by_name('input_data:0') Y = sess.run([prediction], {input_data: test_data}) maxValue = float('-inf') results = [] for i in range(original_length): for j in range(5): if maxValue < Y[0][i][j]: maxValue = Y[0][i][j] index = j + 1 results.append(index) maxValue = float('-inf') index = 0 print(results) return results
def load_data(path=dir_path + '/data/train'): print("Loading Review Data...") data = [] dir = os.path.dirname(__file__) file_list = glob.glob(os.path.join(dir, path + '/1/*')) file_list.extend(glob.glob(os.path.join(dir, path + '/2/*'))) file_list.extend(glob.glob(os.path.join(dir, path + '/3/*'))) file_list.extend(glob.glob(os.path.join(dir, path + '/4/*'))) file_list.extend(glob.glob(os.path.join(dir, path + '/5/*'))) print("Parsing %s files" % len(file_list)) for _, f in enumerate(file_list): with open(f, "r") as openf: s = openf.read() data.append(imp.preprocess( s)) # NOTE: Preprocessing code called here on all reviews return data
def load_data(path='./data/train'): """ Load raw reviews from text files, and apply preprocessing Append positive reviews first, and negative reviews second RETURN: List of strings where each element is a preprocessed review. """ print("Loading IMDB Data...") data = [] dir = os.path.dirname(__file__) file_list = glob.glob(os.path.join(dir, path + '/pos/*')) file_list.extend(glob.glob(os.path.join(dir, path + '/neg/*'))) print("Parsing %s files" % len(file_list)) for i, f in enumerate(file_list): with open(f, "r") as openf: s = openf.read() data.append(imp.preprocess(s)) # NOTE: Preprocessing code called here on all reviews return data
def load_zip(name='data.zip', dataset='train'): """ Load raw reviews from text files, and apply preprocessing Append positive reviews first, and negative reviews second RETURN: List of strings where each element is a preprocessed review. """ print("Loading IMDB Data...") data = [] # data_zip = zp.ZipFile(name) with zp.ZipFile(name) as data_zip: for path in data_zip.namelist(): path_split = path.split('/') # print(path_split) if path_split[1] == dataset and path_split[-1] != '': with data_zip.open('/'.join(path_split)) as f: s = f.read() data.append(imp.preprocess(s.decode())) return data
def classifyAll(): allRatings = Rating.objects.all() allRatingsToClassify = [r for r in allRatings if int(r.value) == 0] allTextsToClassify = [r.notes for r in allRatingsToClassify] while len(allTextsToClassify)%100 != 0: allTextsToClassify.append("") while len(allRatingsToClassify)%100 != 0: allRatingsToClassify.append(None) print(len(allTextsToClassify)) texts_length = len(allTextsToClassify) numBatches = texts_length//100 data_text = [imp.preprocess(review) for review in allTextsToClassify] glove_array, glove_dict = load_glove_embeddings() test_data = embedd_data(data_text, glove_array, glove_dict) sess = tf.InteractiveSession() last_check = tf.train.latest_checkpoint('./checkpoints') saver = tf.train.import_meta_graph(last_check + ".meta") saver.restore(sess, last_check) graph = tf.get_default_graph() prediction = graph.get_tensor_by_name('prediction:0') input_data = graph.get_tensor_by_name('input_data:0') for i in range(numBatches): startIndex = i*100 endIndex = (i+1)*100 ratingsBatch = allRatingsToClassify[startIndex:endIndex] inputBatch = test_data[startIndex:endIndex] Y = sess.run([prediction], {input_data: inputBatch}) maxValue = float('-inf') results = [] for i in range(100): for j in range(5): if maxValue < Y[0][i][j]: maxValue = Y[0][i][j] index = j + 1 results.append(index) maxValue = float('-inf') index = 0 predicts = results print(predicts) for i in range(100): rating = ratingsBatch[i] if rating == None: continue classification = predicts[i] rating.value = classification rating.save()
def main(): processed_review = preprocess(prostr) print(processed_review)