Esempio n. 1
0
    def __init__(self, corpus_file_path):
        self.name_of_files = {}
        # Initializing the names of the files
        for i in range(0, 26):
            for j in range(0, 26):
                self.name_of_files["%s%s" %
                                   ((chr(ord('a') + i)),
                                    (chr(ord('a') + j)))] = Queue.Queue()
        for i in range(0, 10):
            for j in range(0, 10):
                self.name_of_files["%s%s" % (('%d' % i),
                                             ('%d' % j))] = Queue.Queue()
        self.name_of_files['other'] = Queue.Queue()

        # The file type is txt
        self.file_type = 'txt'

        # Creating the initial posting files
        self.create_initial_posting_file()

        # Initializing the Parser
        self.parser = Parser()
        # Initializing the ReadFile
        self.read_file = ReadFile(corpus_file_path)

        # Initializing the dictionary
        self.main_dictionary = {}

        self.temp_position_dic = {}
Esempio n. 2
0
def start_indexing(dirs_list, dirs_dicts, main_path, posting_path, to_stem,
                   start_index, end_index, directory):
    dirs_dicts[directory] = None
    reader = ReadFile()
    parser = Parse(main_path)
    indexer = Indexer(posting_path + directory)

    if to_stem:
        parser.to_stem = True
        indexer.to_stem = True
    if not os.path.exists(posting_path + directory):
        os.makedirs(posting_path + directory)

    documents = {}
    i = start_index
    while i < end_index:
        docs = reader.separate_docs_in_file(main_path + '\\corpus',
                                            dirs_list[i])
        j = 0
        for doc_id in docs:
            doc_dict = parser.main_parser(docs[doc_id].text, docs[doc_id])
            docs[doc_id].text = None
            if i == end_index - 1 and j == len(docs) - 1:
                indexer.finished_parse = True
            indexer.index_terms(doc_dict, doc_id)
            documents[doc_id] = docs[doc_id]
            j += 1
        i += 1
    dirs_dicts[directory] = [
        indexer.post_files_lines, indexer.terms_dict, documents,
        reader.languages
    ]
 def __init__(self, dirName, stopWordsDict=None):
     self.dirName = dirName
     self.wordsDict = {}
     self.stopWordsDict = stopWordsDict
     self.totalWords = 0
     fileList = os.listdir(dirName)
     self.count = len(fileList)
     for fileName in fileList:
         rdFileObj = ReadFile(dirName + '/' + fileName)
         wordsList = rdFileObj.getWords()
         self._populateWordsDict(wordsList)
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--dir",help="Fetch files from given directory & execute")
    parser.add_argument("-t", "--template", help="Provide template file for the document")
    parser.add_argument("-s", "--subject", help="provide subject name override in document")
    parser.add_argument("-n", "--name", help="name for the output doc file")
    parser.add_argument("-pn", "--pno", help="Practical No ")
    args = parser.parse_args()

    if not args.dir :
        print("directory should be specified")
        sys.exit(0)

    if not args.template:
        args.template = "template.docx"

    if not args.pno:
        args.pno = 1
    if not args.name :
        args.name = "AnilSoni-" + str(datetime.now().strftime("%d-%m-%y %H-%M-%S")) + ".docx"

    #print(args.dir, args.template, args.name)
    files = GetFiles( args.dir )
    path,list_files = files.get_file()
    path += "\\"
    #print("{}, {}".format(path,list_files))
    #execute all files present in directory
    execute_program = ExecuteProgram()
    doc = CreateDocument(template= args.template, subject= args.subject, practicalno = args.pno)
    read_file_source = ReadFile()
    pic_name=""
    for pythonfile in list_files:
        #print("Executing {} ".format(pythonfile))
        execute_program.execute(path=args.dir,filename=pythonfile)    
        #execute_program.wait()
        sleep(5)
        pic = screenshot()
        pic_name = pythonfile[0:len(pythonfile)-3] + ".png"
        pic.save(pic_name)
        #print("screenshot saved with name {}".format(pic_name))
        execute_program.terminate()
        source = read_file_source.read_file(filename = args.dir+pythonfile)
        #print(args.dir+pythonfile)
        source = "".join(source)
        print(execute_program.get_aim())
        doc.set_aim_source_output(aim=execute_program.get_aim(), source=source, output= pic_name)
        #print("terminating cmd")
        
        sleep(2)
        
        #print("Terminating the process")

    doc.save_doc(args.name)
Esempio n. 5
0
 def __init__(self):
     self.main_path = ''
     self.posting_path = ''
     self.to_stem = False
     self.indexer = None
     self.reader = ReadFile()
     self.languages = set()
     self.searcher = None
     self.queries_docs_results = []
     self.avg_doc_length = 0
     self.with_semantics = False
     self.save_path = ''
Esempio n. 6
0
def main():

    p1 = ReadFile('./point_cloud_registration/pointcloud1.fuse')
    p2 = ReadFile('./point_cloud_registration/pointcloud2.fuse')
    icp = ICP(p1, p2)
    icp.setThres(0.1)
    icp.setTol(0.01)
    t = icp.iterativeClosestPoint()
    print('Rotation matrix')
    print(t[0])
    print('Translation vector')
    print(t[1])
    print('iteration No.')
    print(t[2])
Esempio n. 7
0
class Semantics:
    def __init__(self):
        self.reader = ReadFile()
        self.path = 'D:\\Studies\\BGU\\semesterE\\IR\\IRProj\\SEproject\\corpus'
        self.sentences = []

    def read_corpus(self):
        i = 0
        dirs_list = os.listdir(self.path)

        while i < 3:  # len(dirs_list)
            docs = self.reader.separate_docs_in_file(self.path, dirs_list[i])
            for doc_id in docs:
                terms = gensim.utils.simple_preprocess(docs[doc_id].text)
                self.sentences.append(terms)
            i += 1
            docs.clear()

    def start(self):
        # train model
        # model = Word2Vec(self.sentences, min_count=2)
        #model = gensim.models.Word2Vec(documents, size=150, window=10, min_count=2, workers=10)
        # model.train(self.sentences, total_examples=len(self.sentences), epochs=10)
        # save model
        # model.save('model.bin')
        # load model
        new_model = Word2Vec.load('model.bin')
        # access vector for one word
        w1 = 'intelligence'
        similar = new_model.wv.most_similar(positive=w1)
        print similar
Esempio n. 8
0
 def __init__(self, file_name, folds=5, splits=5,smote=True):
     super(Learner, self).__init__()
     self.file_name = file_name
     self.folds = folds
     self.splits = splits
     self.smote_val=smote
     if not file_name:
         raise Exception("Filename is required.")
     seed(0)
     self.data = Data()
     self.result = Result()
     self.predict = None
     ReadFile(file_name, self.data)
def main():
	print('Start reading links ...')
	readlink = ReadFile('Partition6467LinkData.csv')
	mp = readlink.readLinks()
	print('Links reading complete ...')
	# print('Unit ready ...')
	print('Start reading probe points ...')
	readprobe = ReadFile('Partition6467ProbePoints.csv')
	if (len(sys.argv) > 1):
		print('Start reading %d probes ...' % int(sys.argv[1]))

		prbs = readprobe.readProbes(int(sys.argv[1]))
	else:
		print('Start reading %d probes ...' % 5000)

		prbs = readprobe.readProbes()

	sampleIds = prbs.keys()
	# print(prbs.items())
	mtsq = list()
	print('Probes reading complete ...')
	for sid in sampleIds:
		print('Analyzing probes from sample No.%d ...' % sid)
		 # __init__(self, plist, mp):
		mc = Match(prbs[sid], mp)
		print('\tStart calculating candidate points ...')
		pcps = mc.probeCandidates()
		print('\tCandidate points calculating complete ...')
		print('\tStart matching sequence ...')
		mtsq.extend(mc.findMatchedSequence(pcps))
		print('\tSequence matching complete ...')

	print('Start writing matched sequence ...')
	wf = WriteFile()
	
	wf.writeMP(mtsq)
	print('Sequence writing complete ... ')
	print('Start calculating slopes ...')
	slope = Slope(mtsq, mp)
	slp = slope.calSlope()
	print('Slopes calculating complete ...')
	print('Start writing slopes ...')
	wf.writeS(slp)
	print('Slopes writing complete ...')
 def __init__(self, file_path, target_location = None, regression = None, dates=None, days = None, isCars=False, ignore=None):
     # set class variables
     self.target_location = target_location
     self.file_path = file_path
     self.regression = regression
     self.date_index = dates
     self.days_index = days
     self.data = ReadFile.read(self,file_path)
     # remove specified ignored columns
     if ignore is not None:
         self.data = self.removeColumns(ignore)
     # cars is one of the only categorical sets we really need to map a lot of data in, so if its cars we runn it against
     # our set of conversions
     if isCars:
         for i in range(0,len(self.data)):
             for j in range(0,len(self.data[0])):
                 value = self.data[i][j]
                 if value in self.low_high_values:
                     self.data[i][j] = self.low_high_values[value]
     # if it has dates assign it to above date values
     if dates is not None:
         for i in range(0,len(self.data)):
             self.data[i][dates] = self.date_values[self.data[i][dates]]
     if days is not None:
         for i in range(0,len(self.data)):
             self.data[i][days] = self.day_values[self.data[i][days]]
     # if we are regressing, really ensure all values are floats
     if regression:
         for i in range(0,len(self.data)):
             for j in range(0,len(self.data[0])):
                 self.data[i][j] = float(self.data[i][j])
     # if not regressing, and we have a target class, normalize data that is not class
     if target_location is not None and not regression:
         raw_data = self.separateClassFromData()
         np_array = numpy.array(raw_data)
         np_array = (np_array - np_array.min()) / (np_array.max() - np_array.min())
         self.data = self.joinClassAndData(np_array)
Esempio n. 11
0
def mapping():
    """Read and allow the creation of the sub-pathway."""
    form: InfoForm = InfoForm()
    pathway: List = list()
    gen_file: file = None
    if request.method == "POST":
        if form.validate_on_submit():
            email: str = request.form["email"]
            option = request.form['options']
            gene: str = request.form['gene']
            basepairs: int = int(request.form['basepairs'])
            accession_number: str = request.form['accession_number']
            ident: int = int(request.form['ident'])
            hit_size: int = int(request.form['hit_size'])
            expect: int = int(request.form['expect'])
            try:
                gen_file: file = request.files['upload']
            except:
                pass
            if gen_file and allowed_file(gen_file.filename):
                filename: str = secure_filename(gen_file.filename)
                gen_file.save(
                    os.path.join(app.config['UPLOAD_FOLDER'], filename))
                readFile = ReadFile(os.path.join(UPLOAD_FOLDER, filename))
                pathway = readFile.get_gene(option, gene, basepairs,
                                            ident / 100)
            else:
                filename = search.searchGenbank(accession_number)
                if not filename:
                    return "Not found"
                readFile = ReadFile(filename)
                pathway = readFile.get_gene(option, gene, basepairs,
                                            ident / 100, hit_size, expect)
            if pathway != False:
                job_number = uuid.uuid4()
                insert_job_number(job_number)
                insert_job_data(job_number, pathway)
                ## send_email(email, job_number)
                return redirect(url_for("diagram", job_number=job_number))
            else:
                return "Not found"
        else:
            return render_template('map.html', form=form)
    return render_template('map.html', form=form)
Esempio n. 12
0
    for pos in snake:
        pygame.draw.rect(screen, snake_color, (pos[1] * map_length / map_len + 1, pos[0] * map_length / map_len + 1,
                                               map_length / map_len, map_length / map_len), 0)
    pygame.display.update()


def draw_food(map_len, screen, food_pos, map_length):
    food_color = (0, 166, 244)
    pygame.draw.rect(screen, food_color,
                     (food_pos[1] * map_length / map_len + 1, food_pos[0] * map_length / map_len + 1,
                      map_length / map_len, map_length / map_len))


if len(sys.argv) > 0:
    # x = ReadFile(sys.argv[1], "map-out.txt")
    x = ReadFile("test.txt", "map-out.txt")
    t = x.read_file()

    food_fen = FoodGenerator()

    for m in range(0,11):
        init_state = GameMap(t, None, [0, 2], [[0, 2], [0, 1], [0, 0]], True)
        init_state_diagonal = GameMap(t, None, [0, 2], [[0, 2], [0, 1], [0, 0]], False)
        init_state.set_food_position(food_fen.generate_food(init_state.game_map))
        init_state_diagonal.set_food_position(food_fen.generate_food(init_state.game_map))

        movement_list_manhattan = []
        movement_list_diagonal = []
        for i in range(0, 30):
            # print "Score=", i
            mySearch = AStarSearch(init_state)
Esempio n. 13
0
    filePath = options.directory
    outputDir = options.outputDir
    variable = options.para
    # global xaxis;
    # xaxis = options.xaxis

    # if directory[len(directory)-1] != '/':
    # 	directory = directory + '/'

    # print("Scan:", directory)

    # raw
    global g_dicData
    g_dicInputData = {}

    readFile = ReadFile()
    statComp = StatisticComp()

    # g_dicInputData = readFile.readDire(filePath)
    g_dicInputData = readFile.readSDire(filePath)
    # g_dicInputData = readFile.readDireTimeseries(filePath)

    print(g_dicInputData)
    # if DEBUG:
    # 	pprint.pprint(g_dicInputData)
    # print(len(g_dicInputData['5']))
    # collectData(directory)

    # sys.exit()

    global g_dicMeanData
Esempio n. 14
0

def setErrors(data):
    length = len(data)
    i, j = 0, 7
    while j <= length:
        randIndex = randint(i, j - 1)
        temp = data[randIndex]
        temp = '1' if int(temp) == 0 else '0'
        data = data[:randIndex] + temp + data[randIndex + 1:]
        i = j
        j += 7
    return data


file = ReadFile('txt_files/Text.txt', analize=True)
source = Huffman(file.data)
encripted = source.encript(file.file_text)
channel = Hamming()
encripted = channel.encode(encripted)

#set errors
errorData = setErrors(encripted)
writeToFile('Encripted Text', errorData)

#fix errors
channel = Hamming()
trueData = channel.decode(errorData)
decripted = source.decript(trueData)
writeToFile('Decripted Text', decripted)
Esempio n. 15
0
 def __init__(self):
     self.reader = ReadFile()
     self.path = 'D:\\Studies\\BGU\\semesterE\\IR\\IRProj\\SEproject\\corpus'
     self.sentences = []
Esempio n. 16
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import numpy
import struct
import dtw
from ReadFile import ReadFile
from record import recoder
from python_speech_features import mfcc
import scipy.io.wavfile as wav

f1 = ReadFile("mfcc/mfc/1.mfc")
f2 = ReadFile("mfcc/mfc/2.mfc")
f3 = ReadFile("mfcc/mfc/3.mfc")
f4 = ReadFile("mfcc/mfc/4.mfc")
f5 = ReadFile("mfcc/mfc/5.mfc")
f6 = ReadFile("mfcc/mfc/6.mfc")
f7 = ReadFile("mfcc/mfc/7.mfc")
f8 = ReadFile("mfcc/mfc/8.mfc")
f9 = ReadFile("mfcc/mfc/9.mfc")
f10 = ReadFile("mfcc/mfc/10.mfc")

N1 = f1.getN()
N2 = f2.getN()
N3 = f3.getN()
N4 = f4.getN()
N5 = f5.getN()
N6 = f6.getN()
N7 = f7.getN()
N8 = f8.getN()
N9 = f9.getN()
N10 = f10.getN()
Esempio n. 17
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/6/12 20:36
# @Author  : nkuhjp
# @Site    :
# @File    : PageRank.py
# @Software: PyCharm Community Edition

import time
from ReadFile import ReadFile
from BlockStripePR import BlockStripePR

if __name__ == '__main__':
    # 读取文件并分块
    start_time = time.time()
    readFile = ReadFile()
    readFile.get_statistics()
    filename = input('输入要读取的文件名(默认为WikiData.txt):')
    block_num = input('输入要划分的块(默认为15):')
    if not block_num.isdigit() or int(block_num) <= 0:
        block_num = 15
    else:
        block_num = int(block_num)
    readFile.read_file(filename, block_num)
    print('读取文件并分块用时: ', time.time() - start_time, '秒')

    # 根据前面的分块计算score
    start_time = time.time()
    blockStripePR = BlockStripePR(readFile.get_nodeId(),
                                  readFile.get_nodeNumber(),
                                  readFile.get_nodeList(),
Esempio n. 18
0
# --------------------------------------------------------------------------------------------------------------------*
# Author: AratioD @2016
# MULTIPLES OF X AND Y
# Data Structure: One Main program and three class files.
# Main class: MultiplesXY.py
# Data class file 1/3: ReadFile.py: Class: ReadFile
# Data class file 2/3: Calculate.py: Class: Calculate
# Data class file 3/3: WriteFile.py: Class: HypotenusePrintClass
# --------------------------------------------------------------------------------------------------------------------*

# IMPORT all four classes
from ReadFile import ReadFile

readfile = ReadFile()

from Calculate import Calculate

calculate = Calculate()

from WriteFile import WriteFile

writefile = WriteFile()

# MAIN PROGRAM---------------------------------------------------------------------------------------------------------*

# Read the input data name from keyboard and returns the file name
inputDatafileName = readfile.readInputFile()

# Calculated the row amount of the file
rowAmount = readfile.howManyLinesAreInTheInputFile(inputDatafileName)
Esempio n. 19
0
# Github project link: https://github.com/wasymshykh/python-code-analyser

from os import path
from ReadFile import ReadFile
from Calculate import Calculate

if __name__ == '__main__':
    
    # File name and path
    file_name = 'test.java'
    file_path = path.join(path.dirname(path.abspath(__file__)), file_name)
    
    # Opening file
    r = ReadFile(file_path)

    # Source code analysing
    c = Calculate(r.get_lines_list_clean())
    c.print_data()

Esempio n. 20
0
def _test(res=''):

    start_time = time.time()
    filepath = '../data/' + res + '.csv'

    seed(1)
    np.random.seed(1)
    read = ReadFile(filepath)
    data, labels = read.build_table()

    ## Normalize Preprocessing
    #data = normalize(np.array(zip(*data)), norm='l2')
    #data=np.array(zip(*data))

    split = split_two(np.array(data), np.array(labels))
    pos = np.array(split['pos'])
    neg = np.array(split['neg'])

    ## 20% train and test
    final = {}
    result = {}
    cut_pos, cut_neg = cut_position(pos, neg, percentage=80)
    for learner in learners:
        start_time1 = time.time()
        l = []
        x = {}
        measures = [
            "Recall", "Precision", "Accuracy", "F_score", "False_alarm", "AUC"
        ]
        for q in measures:
            x[q] = []
        for folds in range(15):
            pos_shuffle = range(0, len(pos))
            neg_shuffle = range(0, len(neg))
            shuffle(pos_shuffle)
            shuffle(neg_shuffle)
            pos = pos[pos_shuffle]
            neg = neg[neg_shuffle]
            data_train, train_label, data_test, test_label = divide_train_test(
                pos, neg, cut_pos, cut_neg)
            de = DE(F=0.7, CR=0.3, x='rand')

            global max_fitness
            max_fitness = 0
            pop = [[
                choice(bounds[0]),
                choice(bounds[1]),
                choice(bounds[2]),
                uniform(bounds[3][0], bounds[3][1])
            ] for _ in range(10)]

            v, score, final_para_dic = de.solve(main,
                                                pop,
                                                iterations=3,
                                                file=res,
                                                data_samples=data_train,
                                                target=train_label,
                                                learner=learner)

            model = Learner(samples=np.vstack((data_train, data_test)),
                            labels=train_label + test_label,
                            smote=True,
                            v=v,
                            percentage=80)
            model.run(learners=[learner])
            x["Accuracy"].append(model.get_accuracy()[0][1])
            x["F_score"].append(model.get_f_score()[0][1])
            x["Precision"].append(model.get_precision()[0][1])
            x["Recall"].append(model.get_recall()[0][1])
            x["False_alarm"].append(model.get_false_alarm()[0][1])
            x["AUC"].append(model.get_auc()[0][1])
            l.append([v, score, final_para_dic])
        print(x)
        result[learner] = [x, l, time.time() - start_time1]
    final[res] = result
    print(final)
    with open('../dump/auc/' + res + '_auc1.pickle', 'wb') as handle:
        pickle.dump(final, handle)
Esempio n. 21
0
__author__ = 'oupeng'
# -*- conding:utf8 -*-
from ReadFile import ReadFile
from ThreadOperation import ThreadOperation


if __name__=='__main__':

    rf=ReadFile("newUtilityRecords.txt")
    if rf.validatePath()==True:
        eList=rf.parseRow()
        NumOfThread=len(eList)
    # for one in eList:
    #     executer(one)


    threads=[]
    for i in range(0,NumOfThread):
        threads.append(ThreadOperation(eList[i]).start())
    for t in threads:
        t.join()
    answer=raw_input("please input y to exit")
    while(answer!='y'):
        answer=raw_input("please input y to exit")
Esempio n. 22
0
from Join_Ftr import *
from ReadFile import ReadFile
from GS_Face import GsFace

feature_pth = 'E:\\GPforFR\\data\\lfw_feature5'

instruc_pth_t = 'E:\\GPforFR\\data\\lfw_view1\\pairsDevTrain.txt'
instruc_pth_s = 'E:\\GPforFR\\data\\lfw_view1\\pairsDevTest.txt'

num = 5

read_file = ReadFile(instruc_pth_t, num)
X1 = read_file.person_pair() + read_file.person_mispair()

read_file = ReadFile(instruc_pth_s, num)
X2 = read_file.person_pair() + read_file.person_mispair()

gs_feature = Join_Ftr()

Xtar, Ytar = gs_feature.Constrct_XY(feature_pth, X1)
Xsrc, Ysrc = gs_feature.Constrct_XY(feature_pth, X2)

Xt_in, Yt_in = gs_feature.XY_in(Xtar, Ytar)
Xs_in, Ys_in = gs_feature.XY_in(Xsrc, Ysrc)

gsface = GsFace(Xt_in, Xs_in)
Esempio n. 23
0
class Indexer:
    name_of_files = None  # The names of the initial posting files
    file_type = None  # The file type of the posting files
    parser = None  # The parser of the project
    read_file = None  # The ReadFile of the class
    main_dictionary = None
    temp_position_dic = None

    # The constructor of the class
    def __init__(self, corpus_file_path):
        self.name_of_files = {}
        # Initializing the names of the files
        for i in range(0, 26):
            for j in range(0, 26):
                self.name_of_files["%s%s" %
                                   ((chr(ord('a') + i)),
                                    (chr(ord('a') + j)))] = Queue.Queue()
        for i in range(0, 10):
            for j in range(0, 10):
                self.name_of_files["%s%s" % (('%d' % i),
                                             ('%d' % j))] = Queue.Queue()
        self.name_of_files['other'] = Queue.Queue()

        # The file type is txt
        self.file_type = 'txt'

        # Creating the initial posting files
        self.create_initial_posting_file()

        # Initializing the Parser
        self.parser = Parser()
        # Initializing the ReadFile
        self.read_file = ReadFile(corpus_file_path)

        # Initializing the dictionary
        self.main_dictionary = {}

        self.temp_position_dic = {}

# def add_to_dicts(self):
#    numOfDoc, text = self.read_file.getFile()
#    counter = 0
#    num_of_docs = 1000
#    parser_threads = []
#    add_to_dictionary_threads = []
#    text_to_thread = []
#    while text is not "all docs are received" and counter!=1000:
#        counter += 1
#       print(counter)
#    if counter % num_of_docs ==0:
#        thread = ParserThread(self.queue_of_parser,self.parser,text_to_thread)
#        text_to_thread = []
#        parser_threads.append(thread)
#        thread.start()
#        thread = AddToDictionaryThread(self.queue_of_parser,num_of_docs,self.read_file,self.main_dictionary)
#        add_to_dictionary_threads.append(thread)
#        thread.start()

#    text=self.find_sub("TEXT",text)
#    text_to_thread.append((text,counter))
#dictionary_of_words, dictionary_of_unique_terms, max_freq=self.parser.parse_to_unique_terms(text)
#self.add_to_main_dictionary_spacial(dictionary_of_unique_terms)
#for key in dictionary_of_words:
#    self.add_term_to_dictionary(key,dictionary_of_words[key])
#self.read_file.add_to_max_values_dict(max_freq,numOfDoc)
#    numOfDoc, text = self.read_file.getFile()
#if len(text_to_thread)!=0:
#   thread = ParserThread(self.queue_of_parser, self.parser, text_to_thread)
#    parser_threads.append(thread)
#    thread.start()
#    thread = AddToDictionaryThread(self.queue_of_parser, len(text_to_thread), self.read_file, self.main_dictionary)
#    add_to_dictionary_threads.append(thread)
#    thread.start()
#print "wait for parsing"
#for i in range(0,len(parser_threads)):
#    parser_threads[i].join()
#print "wait for dic"
#for i in range(0,len(add_to_dictionary_threads)):
#    add_to_dictionary_threads[i].join()
#print(self.main_dictionary)

    def add_to_dicts2(self):
        numOfDoc, text = self.read_file.getFile()
        coun = 0
        num = 0
        while text is not "all docs are received" and coun != 1000:
            coun += 1
            print(coun)

            text = self.find_sub("TEXT", text)
            dictionary_of_words, dictionary_of_unique_terms, max_freq = self.parser.parse_to_unique_terms(
                text)
            self.add_to_main_dictionary_spacial(dictionary_of_unique_terms,
                                                numOfDoc)
            for key in dictionary_of_words:
                key = self.add_term_to_dictionary(key)
                self.add_term_to_queue(key, numOfDoc, dictionary_of_words[key])
            self.read_file.add_to_max_values_dict(max_freq, numOfDoc)
            numOfDoc, text = self.read_file.getFile()

    def index_files(self):
        thread_add_to_dic = threading.Thread(target=self.add_to_dicts2())
        threads = []
        for name in self.name_of_files:
            thread = Consumer(self.name_of_files[name], name, self.file_type,
                              self.main_dictionary, 1, self.temp_position_dic)
            threads.append(thread)
            thread.start()
        thread_add_to_dic.start()
        thread_add_to_dic.join()
        for i in range(0, len(threads)):
            threads[i].stop_thread()
        for i in range(0, len(threads)):
            threads[i].join()

    def add_to_main_dictionary_spacial(self, dict, doc_id):
        for key in dict:
            if key in self.main_dictionary:
                self.main_dictionary[key][0] = self.main_dictionary[key][0] + 1
            else:
                self.main_dictionary[key] = [1, -1]
                self.temp_position_dic[key] = [-1, -1]

            self.add_term_to_queue(key, doc_id, self.main_dictionary[key][0])

    def add_term_to_queue(self, term, doc_id, tf):
        note = term[0].lower()
        note2 = note
        flag1 = (note >= 'a' and note <= 'z')
        flag2 = (note >= '0' and note <= '9')
        if flag1 or flag2:
            if len(term) > 1:
                low = term[1].lower()
                if ((low >= 'a' and low <= 'z')
                        and flag1) or ((low >= '0' and low <= '9') and flag2):
                    note2 = term[1].lower()
            self.name_of_files["%s%s" % (note, note2)].put((term, doc_id, tf))
            return
        self.name_of_files['other'].put((term, doc_id, tf))

    # This function will create the initial posting files
    def create_initial_posting_file(self):
        # Go through every file name and create it
        for name in self.name_of_files:
            file = open("%s.%s" % (name, self.file_type), "w")
            file.close()
            #os.remove(file.name) # just for now

    # This function will add a term to the dictionary
    def add_term_to_dictionary(self, term):
        # If the term is already in the dictionary
        if term in self.main_dictionary:
            self.main_dictionary[term][0] = self.main_dictionary[term][0] + 1
            return term

        # If the term starts with a capital letter
        if term[0] >= 'A' and term[0] <= 'Z':
            lower = term.lower()
            # If the term is already in the dictionary in small letters
            if lower in self.main_dictionary:
                self.main_dictionary[lower][
                    0] = self.main_dictionary[lower][0] + 1
                return lower

        elif term[0] >= 'a' and term[0] <= 'z':
            upper = term.upper()
            # If the term is already in the dictionary in small letters
            if upper in self.main_dictionary:
                self.main_dictionary[term] = [
                    self.main_dictionary[upper][0] + 1, -1
                ]
                self.temp_position_dic[term] = [
                    self.temp_position_dic[key][0],
                    self.temp_position_dic[key][1]
                ]
                del self.temp_position_dic[upper]
                del self.main_dictionary[upper]
                return term
        # If the term is new in the dictionary
        self.main_dictionary[term] = [1, -1]
        self.temp_position_dic[term] = [-1, -1]
        return term

    # This function will return the string between two tags
    def find_sub(self, tag, string):
        start = "<" + tag + ">"
        end1 = "</" + tag + ">"
        string = string[string.find(start) + len(start):string.find(end1)]
        return string
Esempio n. 24
0
class Main:
    """
        Class Description :
            Implements the Controller of the MVC model, runs the project.
    """
    """
        Desctiption
            This method is for initializing Main's properties
    """
    def __init__(self):
        self.main_path = ''
        self.posting_path = ''
        self.to_stem = False
        self.indexer = None
        self.reader = ReadFile()
        self.languages = set()
        self.searcher = None
        self.queries_docs_results = []
        self.avg_doc_length = 0
        self.with_semantics = False
        self.save_path = ''

    """
        Description :
            This method manages the program 
    """

    def start(self):
        self.indexer = Indexer(self.posting_path)
        if self.to_stem:
            self.indexer.to_stem = True
        dirs_list = os.listdir(self.main_path + '\\corpus')
        # Create temp postings Multiprocessing
        dirs_dict = ParallelMain.start(self.main_path, self.posting_path,
                                       self.to_stem, dirs_list)
        # Merging dictionaries that were created by the processes
        docs = {}
        files_names = []
        post_files_lines = []
        total_length = 0
        for dir in dirs_dict.keys():
            tmp_docs_dict = dirs_dict[dir][2]
            for doc_id in tmp_docs_dict:
                docs[doc_id] = tmp_docs_dict[doc_id]
                total_length += docs[doc_id].length
            for lang in dirs_dict[dir][3]:
                self.languages.add(lang)
            old_post_files_lines = dirs_dict[dir][0]
            for i in range(0, len(old_post_files_lines)):
                files_names.append(dir + "\\Posting" +
                                   str(i) if not self.to_stem else dir +
                                   "\\sPosting" + str(i))
                post_files_lines.append(old_post_files_lines[i])

        self.avg_doc_length = total_length / len(docs)

        # Gets Cities that appear in the corpus
        i = 0
        while i < len(dirs_list):
            self.reader.read_cities(self.main_path + '\\corpus', dirs_list[i])
            i += 1

        terms_dicts = [
            dirs_dict["\\Postings1"][1], dirs_dict["\\Postings2"][1],
            dirs_dict["\\Postings3"][1], dirs_dict["\\Postings4"][1]
        ]

        terms_dict = Merge.start_merge(files_names, post_files_lines,
                                       terms_dicts, self.posting_path,
                                       self.to_stem)

        self.indexer.docs_avg_length = self.avg_doc_length
        self.indexer.terms_dict = terms_dict
        self.indexer.docs_dict = docs
        self.indexer.index_cities(self.reader.cities)
        self.indexer.post_pointers(self.languages)
        # self.searcher = Searcher(self.main_path, self.posting_path, self.indexer.terms_dict, self.indexer.cities_dict,
        #                          self.indexer.docs_dict, self.avg_doc_length, self.to_stem, self.with_semantics)
        # self.searcher.model = Word2Vec.load('model.bin')
        # path = self.posting_path + '\FinalPost' + '\Final_Post'
        # linecache.getline(path, 500000)

    """
        Description :
            This method calls the Indexer function for loading saved files to the programs main memory
    """

    def load(self):
        self.indexer = Indexer(self.posting_path)
        if self.to_stem:
            self.indexer.to_stem = True
        self.languages = self.indexer.load()
        self.avg_doc_length = self.indexer.docs_avg_length
        self.searcher = Searcher(self.main_path, self.posting_path,
                                 self.indexer.terms_dict,
                                 self.indexer.cities_dict,
                                 self.indexer.docs_dict, self.avg_doc_length,
                                 self.to_stem, self.with_semantics)
        self.searcher.model = Word2Vec.load(self.posting_path + '//model.bin')

    """
        Description :
            This method erases all of the files in the Posting path
    """

    def reset(self):
        shutil.rmtree(self.posting_path)
        if not os.path.exists(self.posting_path):
            os.makedirs(self.posting_path)
        self.indexer = None

    """
        Description :
            This method returns the terms dictionary, used by GUI IndexView for showing the dictionary.
    """

    def get_terms_dict(self):
        return self.indexer.terms_dict

    """
        Description :
            This method returns the Languages of the corpus, used by GUI IndexView for showing the lagnuages.
    """

    def get_languages(self):
        # should return string with languages separated with '\n'
        return self.languages

    """
        Description  :
            This method gets the corpus path from the GUI
    """

    def set_corpus_path(self, path):
        self.main_path = path

    """
         Description  :
             This method gets the posting path from the GUI
    """

    def set_posting_path(self, path):
        self.posting_path = path

    """
         Description  :
             This method gets the stemming bool from the GUI
    """

    def set_stemming_bool(self, to_stem):
        self.to_stem = to_stem

    def set_with_semantics(self, with_semantics):
        self.with_semantics = with_semantics
        self.searcher.with_semantics = with_semantics

    def report(self):
        num_count = 0
        i = 0
        freq = {}
        for term in self.indexer.terms_dict.keys():
            if Parse.isFloat(term):
                num_count += 1
            freq[term] = self.indexer.terms_dict[term][1]

        freq_list = sorted(freq.items(), key=itemgetter(1))
        with open('frequency.txt', 'wb') as f:
            for n in freq_list:
                f.write(str(n[0]) + ": " + str(n[1]) + '\n')

        print "Num of terms which are nums: " + str(num_count)
        print "Num of countries: " + str(len(self.indexer.countries))
        print "Num of capitals: " + str(self.indexer.num_of_capitals)

    def set_save_path(self, dir_path):
        self.save_path = dir_path

    def save(self):
        file_name = ''
        if self.to_stem:
            file_name += 's'
        if self.with_semantics:
            file_name += 's'
        file_name = '\\' + file_name + 'results.txt'
        with open(self.save_path + file_name, 'a+') as f:
            for query_result in self.queries_docs_results:
                for doc in query_result[2]:
                    line = " {} 0 {} 1 42.38 {}\n".format(
                        query_result[0], doc[0], 'rg')
                    f.write(line)

    def get_cities_list(self):
        if self.indexer is None:
            return None
        return self.indexer.cities_dict.keys()

    def start_query_search(self, query, chosen_cities):
        return self.searcher.search(query, chosen_cities)

    def start_file_search(self, queries_path_entry, chosen_cities):
        queries_list = []
        current_queries_results = []
        with open(queries_path_entry, 'rb') as f:
            lines = f.readlines()
            id = 0
            i = 0
            query = ''
            narr = ''
            while i < len(lines):
                if '<num>' in lines[i]:
                    id = lines[i].split(':')[1].replace('\n', '')
                elif '<title>' in lines[i]:
                    query = lines[i].replace('<title>', '').replace('\n', '')
                elif '<desc>' in lines[i]:
                    i += 1
                    while not '<narr>' in lines[i]:
                        query = '{} {}'.format(
                            query, lines[i].replace('<title>',
                                                    '').replace('\n', ''))
                        i += 1
                    queries_list.append((id, query))
                i += 1
        for query_tuple in queries_list:
            docs_result = self.start_query_search(query_tuple[1],
                                                  chosen_cities)
            tmp = (query_tuple[0], query_tuple[1], docs_result)
            current_queries_results.append(tmp)
            self.queries_docs_results.append(tmp)
        return self.queries_docs_results

    def get_doc_five_entities(self, doc_id):
        return self.searcher.docs_dict[doc_id].five_entities
Esempio n. 25
0
from Controller import Controller
from CreateQuestions import CreateQuestions
from ReadFile import ReadFile
from View import View

c = Controller()
v = View(c)
v.start()
readfile = ReadFile()
createQuestions = CreateQuestions()
file = open("./text.txt", 'r')
file_text = file.read()
file.close()
file_text = readfile.parse(file_text)
questions = createQuestions.create_questions(file_text)
for q in questions:
    print(q)
    for answer in questions.get(q):
        print(answer)
 def file_system(self):
     while True:
         # Features.feature_list(self)
         choice = input("\nChoose Your Option: ")
         if choice == '1':
             folder_name = input("\nEnter The Folder Name: ")
             folder_address = os.path.join(os.getcwd(), folder_name)
             CreateFolder.folder(self, folder_address)
             folder_location[folder_name] = folder_address
         elif choice == '2':
             file_name = input("\nEnter The File Name: ")
             file_address = os.path.join(os.getcwd(), file_name)
             CreateFile.create_file(self, file_address)
             file_location[file_name] = file_address
         elif choice == '3':
             while True:
                 print("Where You Want To Move?")
                 print("(1) File To Folder")
                 print("(2) Folder To Folder")
                 move = input("\nEnter Your Choice: ")
                 if move == '1':
                     source = input("\nEnter The Source File Name: ")
                     source_address = file_location[source]
                     destination = input(
                         "\nEnter The Destination Folder Name: ")
                     destination_address = folder_location[destination]
                     AddDirectory.add(self, source_address,
                                      destination_address)
                     file_location[source] = os.path.join(
                         destination_address, source)
                     break
                 elif move == '2':
                     source = input("\nEnter The Source Folder Name: ")
                     source_address = folder_location[source]
                     destination = input(
                         "Enter The Destination Folder Name: ")
                     destination_address = folder_location[destination]
                     AddDirectory.add(self, source_address,
                                      destination_address)
                     folder_location[source] = os.path.join(
                         destination_address, source)
                     break
                 else:
                     print("Invalid Input, Type 1 or 2")
         elif choice == '4':
             while True:
                 print("(1) Write in a New File")
                 print("(2) Write in an Existing File")
                 write_choice = input("\nChoose Your Option: ")
                 if write_choice == '1':
                     file = input("Enter The File Name: ")
                     file_address = os.path.join(os.getcwd(), file)
                     content = input(
                         "\nEnter The Content You Want To Write: ")
                     WriteFile.write(self, file_address, content)
                     file_location[file] = file_address
                     break
                 elif write_choice == '2':
                     file = input("\nEnter The File Name: ")
                     file_address = file_location[file]
                     content = input(
                         "\nEnter The Content You Want To Write: ")
                     WriteFile.write(self, file_address, content)
                     break
                 else:
                     print("Invalid Input, Type 1 or 2")
         elif choice == '5':
             file = input("\nEnter The File Name: ")
             file_address = file_location[file]
             ReadFile.read(self, file_address)
         elif choice == '6':
             file = input("\nEnter File Name: ")
             file_address = file_location[file]
             ClearFile.clear(self, file_address)
         elif choice == '7':
             file = input("\nEnter File Name: ")
             file_address = file_location[file]
             ContentLength.get_length(self, file_address)
         elif choice == '8':
             folder = input("\nEnter Folder Name: ")
             folder_address = folder_location[folder]
             FolderSize.folder_size(self, folder_address)
         elif choice == '9':
             required_folder = input("\nEnter Folder Name: ")
             FolderFiles.file_list(self, folder_location[required_folder])
         elif choice == '10':
             folder = input("\nEnter Folder Name: ")
             WalkTree.tree(self, folder_location[folder])
         elif choice == '11':
             print(os.getcwd())
         elif choice == '12':
             FolderFiles.file_list(self, os.getcwd())
         elif choice == '13':
             print("\nGood Bye. See you Soon")
             exit()
         else:
             print("\nInvalid Choice")
    filePath = options.directory
    outputDir = options.outputDir

    global xaxis
    xaxis = options.xaxis

    # if directory[len(directory)-1] != '/':
    # 	directory = directory + '/'

    # print("Scan:", directory)

    # raw
    global g_dicData
    g_dicInputData = {}

    readFile = ReadFile()

    g_dicInputData = readFile.collectData(filePath)

    if DEBUG:
        pprint.pprint(g_dicInputData)
    # collectData(directory)

    global g_dicMeanData
    g_dicMeanData = {}

    meanCompute(g_dicInputData, g_dicMeanData)

    global g_cdfData
    g_cdfData = []
Esempio n. 28
0
import WordOperations
from ReadFile import ReadFile

if __name__ == '__main__':
    print("Welcome To Hangman!")
    print("> The Computer Will Guess Your word using your hints.")

    # read file contents to list
    ReadFile()

    def getCount():
        count = input(
            "Hint 1 - What is The Word count? (Must be between 4-7!): ")

        # if number is not within range ask again.
        if int(count) > 7 or int(count) < 4:
            print("Number must be within the range 4-7")
            getCount()

        return count

    WordOperations.RemoveExtraByRange(getCount())
    WordOperations.RemoveExtraByFLetter(
        input("Hint 2 - Whats the First Letter of the word?: "))
    WordOperations.RemoveExtraByLLetter(
        input("Hint 3 - Whats the Last Letter of the word?: "))
    WordOperations.RemoveExtraBy2L()
    WordOperations.RemoveExtraBy3L()
Esempio n. 29
0
from ReadFile import ReadFile
import pprint
pp = pprint.PrettyPrinter(indent=2)

# #Instancia da Classe
read = ReadFile()

# #Método que lê um arquivo e retorna uma lista.
pib_txt = read.list_file('pib.txt')
regioes_txt = read.list_file('regioes.txt')
#--------- # --------- # ---------- # ------------

#Método que calcula o PIB TOTAL por Regioes
soma_regioes = read.sum_regioes(regioes_txt, pib_txt)
teste = read.somatoria(soma_regioes)
print('Somatória do PIB por Regiões')
pp.pprint(teste)
print('------------------------')
#--------- # --------- # ---------- # ------------

# Método que calcula porcentagem do PIB por estado
print('Porcentagem do PIB por Estados')
pib_states = read.pib_states(pib_txt)
pp.pprint(pib_states)
#--------- # --------- # ---------- # ------------
        i += 1
    return r[200:]


# c = 1
# while c < 21:
#     u = str("rafliA" + str(c))
#     print("Processing... ", u)
#     a,b = ReadFile(u)
#     # print(len(a))
#     a = signalProcessing(a, 15, 20, 500)
#     b = signalProcessing(b, 15, 20, 500)
#     plotData2(a,b,15, str(str(u)+"Filter"), "")
#     c = c + 1

a, b = ReadFile("/ilham/data/ilham20keC1")
print("Processing..")
ap = int(len(a) / 15)
bp = int(len(b) / 15)
a = FunctionOnlyone.bandpass_firwin_filter(a, 1200, 20, 500, len(a) / 15)
b = FunctionOnlyone.bandpass_firwin_filter(b, 1200, 20, 500, len(b) / 15)
a = FunctionOnlyone.fastFourierTransform(a,
                                         15,
                                         name="FFTilhamke20C1A",
                                         fs=len(a) / 15)
b = FunctionOnlyone.fastFourierTransform(b,
                                         15,
                                         name="FFTilhamke20C1B",
                                         fs=len(b) / 15)
# a = FunctionOnlyone.welchFunction(a, 15)
# b = FunctionOnlyone.welchFunction(a, 15)
Esempio n. 31
0
from ReadFile import ReadFile
import preprocess
import dbn_model
import utils


root = utils.get_root_path(False)

read = ReadFile(root + "/NSL_KDD-master").get_data()
data_pp = preprocess.Preprocess(read).do_predict_preprocess()
dbn_model.DBN(data_pp).do_dbn(action='yadlt')
do_dbn = dbn_model.DBN(data_pp).do_dbn_with_weight_matrix(root + "/save")

print("[end]test_dbn")