예제 #1
0
 def crawl(path_rules, search_rules=None, result_rules=None, output=None):
     '''
     crawl directories starting at ```path_rules["start"]``` until ```path_rules["max_depth"]``` depth reached.
     check validity ```path_rules``` on directory and ```path_rules["file"]``` on file then execute ```search_rules``` if valid
     :param path_rules(dict): regex rules on file paths to exclude or include files/directory in the crawler
     :param search_rules(Optional[dict]): regex rules to search pattern in files
     :param result_rules(Optional[dict]): regex rules to extract data from files matched
     :return (dict of str: dict): a dictionary with path of files being keys and values are the results of search_rules on said file
     '''
     result_by_file = {}
     if path_rules is None or "start" not in path_rules:
         return result_by_file
     root_depth = path_rules["start"].rstrip(os.path.sep).count(os.path.sep) - 1
     for dir_path, subdirList, fileList in os.walk(path_rules["start"]):
         current_depth = dir_path.count(os.path.sep) - root_depth
         if "max_depth" not in path_rules or path_rules["max_depth"] >= current_depth:
             for fname in fileList:
                 full_path = os.path.join(dir_path, fname)
                 if os.path.isfile(full_path) \
                         and ("file" not in path_rules or \
                                      FO.validate_string(full_path, path_rules["file"].get("include"), path_rules["file"].get("exclude"))):
                     result_by_file[full_path] = FO.validate_file(full_path, search_rules, result_rules)
             for subdir in subdirList:
                 subdir_full_path = os.path.join(dir_path, subdir)
                 if FO.validate_string(subdir_full_path, path_rules.get("include"), path_rules.get("exclude")) is False:
                     subdirList.remove(subdir)
     if output is not None:
         Crawler.save_crawler_data(result_by_file, output)
     return result_by_file
예제 #2
0
 def crawl_multithread(path_rules, search_rules=None, result_rules=None, output=None, threads=None):
     '''This method is the multithreading version of :func:`Crawler.crawl` with tentative of 1 thread per file'''
     result_by_file = {}
     if path_rules is None or "start" not in path_rules:
         return result_by_file
     futures = {}
     with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
         root_depth = path_rules["start"].rstrip(os.path.sep).count(os.path.sep) - 1
         for dir_path, subdirList, fileList in os.walk(path_rules["start"]):
             current_depth = dir_path.count(os.path.sep) - root_depth
             if "max_depth" not in path_rules or path_rules["max_depth"] >= current_depth:
                 for fname in fileList:
                     full_path = os.path.join(dir_path, fname)
                     if os.path.isfile(full_path) \
                             and ("file" not in path_rules or \
                                          FO.validate_string(full_path, path_rules["file"].get("include"), path_rules["file"].get("exclude"))):
                         futures[executor.submit(FO.validate_file,full_path, search_rules, result_rules)] = full_path
                 for subdir in subdirList:
                     subdir_full_path = os.path.join(dir_path, subdir)
                     if FO.validate_string(subdir_full_path, path_rules.get("include"), path_rules.get("exclude")) is False:
                         subdirList.remove(subdir)
     for future in concurrent.futures.as_completed(futures):
         file_result = futures[future]
         try:
             result_by_file[file_result] = future.result()
         except Exception as exc:
             logging.debug('%r generated an exception: %s',file_result, exc)
     if output is not None:
         Crawler.save_crawler_data(result_by_file, output)
     return result_by_file
예제 #3
0
def PreprocessData():
    # Create an object initialized to None
    pubmedarticlelists = None

    # Create FileOperations object
    fo = FileOperations()

    # parse the xml file
    p = Preprocessing()

    # If parsed file is present then load the file else parse the file
    if fo.exists(GV.parsedDataFile):
        pubmedarticlelists = p.LoadFile(GV.parsedDataFile)

    else:
        # Call the Parse method
        pubmedarticlelists, unsavedpmids = p.parse(GV.inputXmlFile)

        print(len(pubmedarticlelists))
        print(len(unsavedpmids))

        # Save the parsed data to a file
        fo.SaveFile(GV.parsedDataFile, pubmedarticlelists, mode='wb')
        fo.SaveFile(GV.unsavedPmidFile, unsavedpmids, mode='w')

        pubmedarticlelists = p.LoadFile(GV.parsedDataFile)

    del fo

    return pubmedarticlelists
예제 #4
0
 def test_search_string(self):
     f = open("./test/test_inputs/test_search_string.txt", "r")
     content = f.read()
     f.close()
     self.assertTrue(FileOperations.validate_string(content, ["tes"]))
     self.assertTrue(FileOperations.validate_string(content, ["Google", "Analytics"]))
     self.assertFalse(FileOperations.validate_string(content, ["Google", "Analytecs"]))
예제 #5
0
def TokenizeDocs(docs, glossarylist, filename=GV.tokenizedDocumentD2VFile):
    tokenizeddocs = []
    combineddocuments = []
    fo = FileOperations()
    # tokenizer = RegexpTokenizer(r'\w+')
    if fo.exists(filename):
        # Load the file
        combineddocuments = fo.LoadFile(filename)
        pass

    else:
        tokenizer = MWETokenizer(glossarylist)
        regtokenizer = RegexpTokenizer(r'\w+')
        for doc in tqdm(docs):
            sentences = sent_tokenize(doc)

            tmp = []
            for sentence in sentences:
                tokens = tokenizer.tokenize(regtokenizer.tokenize(sentence.lower()))
                token_lowercase = [x.lower() for x in tokens]
                tmp.append(token_lowercase)
            tokenizeddocs.append(tmp)

        for doc in tqdm(tokenizeddocs):
            tokdoc = []
            [tokdoc.extend(sent) for sent in doc]
            combineddocuments.append(tokdoc)

        # Save the file
        fo.SaveFile(filename, combineddocuments, mode='wb')

    del fo

    return combineddocuments
예제 #6
0
 def test_crawl_native_fakeCrawler(self):
     parameter_file = "./test/search_parameters.json"
     c = Crawler("SimpleTest", parameters=FileOperations.get_from_JSON_file(parameter_file))
     self.assertEqual(c.name, "SimpleTest")
     c.crawl_native()
     self.assertTrue(os.path.isfile(parameter_file))
     result_from_file = FileOperations.get_from_JSON_file(c.output["path"])
     self.assertEqual(len(result_from_file), 3)
예제 #7
0
	def __init__(self, tempReadFile, tempSaveDirectory, tempSaveFilename, interval):
		self.temperatureReadFile = tempReadFile
		self.temperatureSaveFile = os.path.join(tempSaveDirectory,tempSaveFilename)
		self.sampleInterval = interval
		self.isRecording = False
		self.recordingLoopActive = False
		self.threadsStarted = False
		self.nextTimer = None
		self.tempLogger = FileOperations(tempSaveDirectory, tempSaveFilename)
예제 #8
0
 def __init__(self, distReadFile, distSaveDirectory, distSaveFilename,
              interval, length):
     self.distanceReadFile = distReadFile
     self.distanceSaveFile = os.path.join(distSaveDirectory,
                                          distSaveFilename)
     self.sampleInterval = interval
     self.isRecording = False
     self.sampleLength = length
     self.recordingLoopActive = False
     self.threadsStarted = False
     self.nextTimer = None
     self.distLogger = FileOperations(distSaveDirectory, distSaveFilename)
    def SaveSimilarDocuments(self, pubmedarticlelists, similardocfilename):
        pdocs = self.doc2vec_model.docvecs.doctag_syn0  # [:pts]

        # Get all the pmids
        pmids = self.doc2vec_model.docvecs.offset2doctag  # [:pts]

        # Create the similar documents dictionary for each pmid
        similardocdict = {}
        import pickle
        for idx, pmid in tqdm(enumerate(pmids)):
            # output the top 20 similair documents
            similardocdict[pmid] = self.doc2vec_model.docvecs.most_similar(
                pmid, topn=23752)
            similardocdict[pmid].insert(0, (pmid, '1.0'))

            #TODO New code
            if idx % 1000 == 0 or idx == 23753:
                with open('./saveddata/simdictpmid.pkl',
                          mode='a+b') as f:  # appending, not writing
                    pickle.dump(similardocdict, f)

                similardocdict = {}

            #TODO

        # { 'pmid1': {'Title':'Title', {Similar:[[id, 'title', score], [id, 'title', score], [id, 'title', score]]},
        #   'pmid2': {'Title':'Title', {Similar:[[id, 'title', score], [id, 'title', score], [id, 'title', score]]},
        #   ...
        # }

        similararticlesdict = {}
        for idx, pmid in tqdm(enumerate(pmids)):
            # Find current pmid title
            doctitle = pubmedarticlelists[pmid].ArticleTitle

            # Find similar documents pmids
            similardocpmids = similardocdict[pmid]

            similartitlescorelist = []

            # Iterate through all the pmids
            for id, score in similardocpmids:
                articletitle = pubmedarticlelists[id].ArticleTitle
                similartitlescorelist.append([id, articletitle, score])

            similararticlesdict[pmid] = {
                'Title': doctitle,
                'Similar': similartitlescorelist
            }

        # Save the similar documents
        fo = FileOperations()
        fo.SaveFile(similardocfilename, similararticlesdict)
예제 #10
0
def TokenizeDocsNew(docs, glossarylist, filename=GV.tokenizedDocumentD2VFile):
    tokenizeddocs = []
    combineddocuments = []
    fo = FileOperations()
    # tokenizer = RegexpTokenizer(r'\w+')
    if fo.exists(filename):
        # Load the file
        combineddocuments = fo.LoadFile(filename)
        pass

    else:
        tokenizer = MWETokenizer(glossarylist)
        regtokenizer = RegexpTokenizer(r'\w+')
        lmtzr = WordNetLemmatizer()
        stemmer = SnowballStemmer("english", ignore_stopwords=True)
        stop_words = stopwords.words('english')
        for doc in tqdm(docs):
            sentences = sent_tokenize(doc)

            tmp = []
            for sentence in sentences:
                # For each sentence in the sentences

                # Tokenize the sentence based on Regex and then using MWETokenizer
                tokens = tokenizer.tokenize(regtokenizer.tokenize(sentence.lower()))

                # Lower the case of all the tokens
                token_lowercase = [x.lower() for x in tokens]

                # Lemmatize the sentence. Find the POS tags and then lemmatize
                tokens_lowecase_tagged = nltk.pos_tag(token_lowercase)
                lammetized_sentence = [lmtzr.lemmatize(wrd, pos=get_wordnet_pos(tag)) for wrd, tag in tokens_lowecase_tagged]

                # Stem the sentence
                stemmed_sentence = [stemmer.stem(wrd) for wrd in lammetized_sentence]

                # Remove the stop words
                processed_sentence = [word for word in stemmed_sentence if word not in stop_words]

                tmp.append(processed_sentence)
            tokenizeddocs.append(tmp)

        for doc in tqdm(tokenizeddocs):
            tokdoc = []
            [tokdoc.extend(sent) for sent in doc]
            combineddocuments.append(tokdoc)

        # Save the file
        fo.SaveFile(filename, combineddocuments, mode='wb')

    del fo

    return combineddocuments
예제 #11
0
    def __init__(self, turns, sheep_count, sheep_speed, wolf_speed, limit):
        self.turns = turns
        self.sheep_count = sheep_count
        self.sheep_speed = sheep_speed
        self.wolf_speed = wolf_speed
        self.limit = limit
        self.sheep_list = []
        self.wolf = Wolf(wolf_speed, self.sheep_list)
        self.turn = 1
        self.list_with_dictionaries = []

        FileOperations.create_csv()
 def __init__(self, parent,app, size, title, style):
     wx.Frame.__init__(self, parent, id=-1, size=size, title=title, style=style)
     self.app=app
     self.Centre()
     self.initUI()
     self.fileoperation=FileOperations()
     self.Show()
    def CreateTaggedDocuments(self, tokenizeddocs, ids):
        taggeddocuments = None
        fo = FileOperations()

        if fo.exists(GV.taggedDocumentFile):
            taggeddocuments = fo.LoadFile(GV.taggedDocumentFile)
        else:
            taggeddocuments = [
                gensim.models.doc2vec.TaggedDocument(s, [ids[i]])
                for i, s in tqdm(enumerate(tokenizeddocs))
            ]
            fo.SaveFile(GV.taggedDocumentFile, taggeddocuments, mode='wb')

        del fo

        return taggeddocuments
예제 #14
0
def Doc2Vec(docs, ids, glossarylist, pubmedarticlelists):

    # Tokenize all the docs
    tokenizeddocs = TokenizeDocs(docs, glossarylist, GV.tokenizedDocumentD2VFile)

    # Create Doc2Vec Model. Changing parameters will change the model name
    doc2vecmodel = Doc2VecModel(seed=1, num_features = 200, min_word_count = 2, context_size = 3)
    taggeddocuments = doc2vecmodel.CreateTaggedDocuments(tokenizeddocs, ids)
    model = doc2vecmodel.Model(taggeddocuments, ids)

    # Get model filename
    modelfile = doc2vecmodel.GetModelFileName()

    #Load the model
    model = doc2vecmodel.LoadModel(modelfile)

    # Save Similar Documents
    doc2vecmodel.SaveSimilarDocuments(pubmedarticlelists, GV.similarDocumentListFile)

    #Play
    similardocdict = FileOperations().LoadFile(GV.similarDocumentListFile)
    print(similardocdict['29794785']['Title'])
    print('---------------------------------------')
    for id, title, score in similardocdict['29794785']['Similar']:
        print(id, ' : ', title)

    doc2vecmodel.Visualize('29794785')
예제 #15
0
 def test_crawl_multithread_mmcoreAsync(self):
     parameter_data = FileOperations.get_from_JSON_file("./test/search_async.json")
     crawlers = parameter_data["crawlers"]
     crawlerName = "dotAsync"
     c = Crawler(crawlerName, parameters=crawlers[crawlerName])
     data = c.crawl_native(threads=None)
     self.assertTrue(len(data) > 0)
     c.save_crawler_data(data, crawlers[crawlerName]["output"])
예제 #16
0
    def menu(self):
        try:
            while True:
                opt = input("command: ").strip()
                if opt == "cl":
                    os.system("clear")
                    continue
                self.client.send(opt.encode("utf-8"))
                if opt == "e" or opt == "exit": break
                if opt == "dl" or opt == "ul":
                    filename = input("File to {}, leave blank to skip: ".format(opt)).strip()
                if opt == "dl":
                    if not filename:
                        self.client.send(" ".encode("utf-8"))
                        continue
                    self.client.send(filename.encode("utf-8"))
                    file_exists = self.client.recv(self.segement_size).decode("utf-8")
                    if file_exists == "1":
                        FileOps.recieve_file("{}/{}".format(self.SAVETO, filename), self.client, self.segement_size)
                        print("[+] File downloaded successfully")
                    else:
                        print(file_exists)
                    continue
                elif opt == "ul":
                    if not filename:
                        self.client.send(" ".encode("utf-8"))
                        continue
                    if FileOps.file_exists(filename):
                        self.client.send(filename.encode("utf-8"))
                        FileOps.send_file(filename, self.client, self.segement_size)
                        print("[+] File uploaded successfully")
                    else:
                        self.client.send(" ".encode("utf-8"))
                        print("[-] Unable to upload as the file could not be found: {}".format(filename))
                else:
                    reply = self.client.recv(self.segement_size).decode("utf-8")
                    print(reply)

        except OSError as err:
            print("[-] The client or server has caused the following error to occur:\n        {}".format(err))
        except KeyboardInterrupt:
            pass
        finally:
            self.client.close()
            print("[*] Client closed.")
            sys.exit(0)
예제 #17
0
 def test_crawl_clientIntegrations(self):
     parameter_data = FileOperations.get_from_JSON_file("./test/search_integration.json")
     crawlers = parameter_data["crawlers"]
     crawlerName = "Integration"
     c = Crawler(crawlerName, parameters=crawlers[crawlerName])
     data = c.crawl_native()
     self.assertTrue(len(data) > 0)
     c.save_crawler_data(data, crawlers[crawlerName]["output"])
예제 #18
0
def InitializeGlossary():

    # Create FileOperation object
    fo = FileOperations()

    # Initialize the two list to None
    glossarylist, synonymlist = [None]*2

    if fo.exists(GV.healthGlossaryFilePath):
        # Load the file from disk
        glossarylist, synonymlist = fo.LoadFile(GV.healthGlossaryFilePath) , fo.LoadFile(GV.synonymsFilePath)

    else:
        # Get all the glossary terms
        glossarylist, synonymlist = GetGlossaryTerms()

        # Save the glossary terms

        fo.SaveFile(GV.healthGlossaryFilePath, glossarylist, mode='wb')

        # Save the synonyms
        fo.SaveFile(GV.synonymsFilePath, synonymlist, mode='wb')

    del fo

    return glossarylist, synonymlist
예제 #19
0
def tag_text():
    file_name = "./data/Restaurants_Train.xml"

    os.environ[
        'CLASSPATH'] = '/home/sol315/Downloads/stanford-postagger-2015-12-09/stanford-postagger.jar'
    os.environ[
        'STANFORD_MODELS'] = './models/english-left3words-distsim.tagger'

    fo = FileOperations(file_name)
    fo.get_xml()
    sentences = fo.get_sentences()

    st = StanfordPOSTagger('english-bidirectional-distsim.tagger')
    f = open('taged-' + file_name[7:-4] + '.json', 'a')
    cur = 0

    for line in sentences:
        cur += 1
        print cur, cur * 100 / len(sentences), '%'
        res = st.tag(line.split())
        json_tag = json.dumps(res)
        f.write(json_tag)
        f.write('\n')
예제 #20
0
def SaveGlossary(glossarylist, synonymlist):
    fo = FileOperations()

    if fo.exists(GV.glossaryFilePath):
        return
    else:
        glossarylist, synonymlist = fo.LoadFile(GV.healthGlossaryFilePath), fo.LoadFile(GV.synonymsFilePath)
        synonymterm2 = set(tuple(term2) for term1, term2 in synonymlist)
        synonymterm2 = list((list(term) for term in synonymterm2))
        glossarylist += list(synonymterm2)
        fo.SaveFile(GV.glossaryFilePath, glossarylist, mode='wb')
    del fo
예제 #21
0
    def simulate(self):
        for i in range(0, self.sheep_count):
            self.sheep_list.append(Sheep(i, self.sheep_speed, self.limit))
        while self.turn <= self.turns and self.sheep_left_check():
            for sheep in self.sheep_list:
                sheep.update()
            self.wolf.update()
            self.display()
            FileOperations.append_to_csv([self.turn, self.count_alive_sheep()])
            FileOperations.append_dictionary_to_list(
                self.list_with_dictionaries, self.create_dictionary())

            self.turn += 1
        FileOperations.create_json(self.list_with_dictionaries)
예제 #22
0
파일: monitor.py 프로젝트: VKEDCO/PYPL
#def getTimestamp():
#	return time.strftime('%Y-%m-%d_%H-%M-%S')


#utility method to set up GPIO used by sensors attached to the pi
#called at the beginning of __main__
def setupGPIO():
	os.system("sudo modprobe w1-therm")
	os.system("sudo modprobe w1-gpio")
	GPIO.setmode(GPIO.BCM)
	GPIO.setup(17, GPIO.IN, GPIO.PUD_UP)


#need to set up filepath and filename when we get config in __main__
#used for all classes and threads in this file for logging purposes
logger = FileOperations()

#Need to setup actual minFreeMB once we get config data in __main__
#used by recording threads to check if there is enough room on the pi to record data
storage = Storage()
storage.setLogger(logger)

#parses data in config file and returns a map of data entries to values
def readConfig():
	configDict = {}
	
	#finding and opening config file
	#parses config file with built in python config parser
	local_file_path = os.path.dirname(os.path.realpath(__file__)) + '/'
	config = ConfigParser.ConfigParser()
	config.readfp(open(local_file_path + 'config'))
예제 #23
0
class utils:
    messageList = []
    chatMsgList = []
    DPPojoList = []
    WPPojoList = []
    errorMessage = ""

    def __init__(self):
        self.message = ""
        print "===> utils.py initiated"
        self.fileOperation = FileOperations()

    def write(self, fileLocation, text):
        self.fileOperation.write(fileLocation, text)

    def loadChatMsgFromFile(self, chatFileLocation):
        #         loading chat
        lines = self.fileOperation.read(chatFileLocation)
        for line in lines:
            self.chatMsgList.append(line)


#loads message.txt

    def loadMessages(self, msgFileName):
        lines = self.fileOperation.read(msgFileName)
        isAMessage = False
        message = ""
        lineNumber = 0
        for eachLine in lines:
            lineNumber += 1
            if eachLine.strip() == "" or eachLine.strip()[0] == '#':
                continue
            elif eachLine.strip().startswith(INSTRUCTION_START):
                if isAMessage:
                    d = PBI.PyBusyInfo("Your message in file ======> " +
                                       msgFileName + " is missing message end "
                                       '</message>'
                                       " /n")
                    wx.Yield()
                    time.sleep(3)
                    del d
                    quit()
                isAMessage = True
                msgPojo = messagePojo()
                id_ = self.getIdFromString(eachLine)
                if id_.isdigit() and len(id_) == 6:
                    msgPojo.setID(id_)
                else:
                    d = PBI.PyBusyInfo("Your message in file ======> " +
                                       msgFileName + " <======= lineNumber= " +
                                       lineNumber + " given id " + id_ +
                                       " is not a valid id/n")
                    wx.Yield()
                    time.sleep(3)
                    del d
                    quit()
                if self.MsgStartIndex > 0 and not INSTRUCTION_END in eachLine:
                    message += eachLine[self.MsgStartIndex:]
            elif isAMessage and not INSTRUCTION_START in eachLine and not INSTRUCTION_END in eachLine:
                message += eachLine + "\n"
            if isAMessage and INSTRUCTION_END in eachLine:
                if self.MsgStartIndex > 0:
                    if (eachLine.strip().startswith(INSTRUCTION_START)):
                        message += eachLine[(self.MsgStartIndex):eachLine.
                                            index(INSTRUCTION_END)] + "\n"
                    elif len(eachLine[:eachLine.index(INSTRUCTION_END)].strip(
                    )) > 0:
                        message += eachLine[:eachLine.index(INSTRUCTION_END
                                                            )] + "\n"
                isAMessage = False
                msgPojo.setMsg(message)
                self.messageList.append(msgPojo)
                message = ""
        return self.messageList

    def getIdFromString(self, line):
        self.MsgStartIndex = -1
        isSpaceInBetwnNum = False
        if "id" in line and "=" in line:
            index = line.index("=")
            id_ = ""
            cnt = 0
            for chr_ in line[index:]:
                cnt += 1
                if chr_ == " ":
                    if len(id_) > 0:
                        isSpaceInBetwnNum = True
                    continue
                if chr_.isdigit():
                    if not isSpaceInBetwnNum:
                        id_ += chr_
                    else:
                        return "has invalid ID"
                elif chr_ == ">":
                    self.MsgStartIndex = index + cnt
                    break
            if (len(id_.strip()) == 6 and id_.strip().isdigit()):
                return id_.strip()
        else:
            return " has No ID"

    def readWPScript(self, WPFilename):
        lines = self.fileOperation.read(WPFilename)
        str_ = ""
        list_ = []
        errorMessage = ""
        threatEndMet = True
        neutralEndMet = True
        for eachLine in lines:
            if eachLine.strip() == "" or eachLine.strip()[0] == '#':
                continue
            else:
                text = eachLine.strip()
                if text.startswith(WORD_PROBE_START) and len(text) == len(
                        WORD_PROBE_START):
                    wppojo = WPPojo()
                elif text.startswith(BLOCK_START) and BLOCK_END in text:
                    blocknumber = eachLine[(
                        eachLine.index(BLOCK_START) +
                        len(BLOCK_START)):eachLine.index(BLOCK_END)].strip()
                    if blocknumber.isdigit():
                        wppojo.setBlockNumber(blocknumber)
                    else:
                        errorMessage += "the block number for word probe is not a number!!"
                elif text.startswith(
                        INSTRUCTION_BEFORE_BLOCK_START
                ) and INSTRUCTION_BEFORE_BLOCK_END in text:
                    listTxt = eachLine[(
                        eachLine.index(INSTRUCTION_BEFORE_BLOCK_START) +
                        len(INSTRUCTION_BEFORE_BLOCK_START)
                    ):eachLine.index(INSTRUCTION_BEFORE_BLOCK_END)].strip()
                    if not listTxt == None and len(listTxt) > 0:
                        list_ = listTxt.split(",")
                        if not list_ == None and len(list_) > 0:
                            wppojo.setBeforeBlockInstructionList(list_)
                elif text.startswith(WORD_PROBE_THREAT_WORD_START):
                    if WORD_PROBE_THREAT_WORD_END in text:
                        threatEndMet = True
                        str_ = eachLine[(
                            eachLine.index(WORD_PROBE_THREAT_WORD_START) +
                            len(WORD_PROBE_THREAT_WORD_START)
                        ):eachLine.index(WORD_PROBE_THREAT_WORD_END)].strip()
                        list_ = []
                        list_ = str_.split(",")
                        wppojo.setThreatWordList(list_)
                        list_ = []
                        str_ = ""
                    else:
                        str_ = eachLine[(
                            eachLine.index(WORD_PROBE_THREAT_WORD_START) +
                            len(WORD_PROBE_THREAT_WORD_START)):].strip()
                        threatEndMet = False
                elif not threatEndMet:
                    if WORD_PROBE_THREAT_WORD_END in text:
                        threatEndMet = True
                        str_ = str_ + eachLine[:eachLine.index(
                            WORD_PROBE_THREAT_WORD_END)].strip()
                        list_ = str_.split(",")
                        wppojo.setThreatWordList(list_)
                        list_ = []
                        str_ = ""
                    else:
                        str_ = str_ + eachLine.strip()
                        threatEndMet = False
                elif threatEndMet and text.startswith(
                        WORD_PROBE_NEUTRAL_WORD_START):
                    if WORD_PROBE_NEUTRAL_WORD_END in text:
                        neutralEndMet = True
                        str_ = eachLine[(
                            eachLine.index(WORD_PROBE_NEUTRAL_WORD_START) +
                            len(WORD_PROBE_NEUTRAL_WORD_START)
                        ):eachLine.index(WORD_PROBE_NEUTRAL_WORD_END)].strip()
                        list_ = str_.split(",")
                        wppojo.setNeutralWordList(list_)
                        list_ = []
                        str_ = ""
                    else:
                        str_ = eachLine[(
                            eachLine.index(WORD_PROBE_NEUTRAL_WORD_START) +
                            len(WORD_PROBE_NEUTRAL_WORD_START)):].strip()
                        neutralEndMet = False
                elif not neutralEndMet:
                    if WORD_PROBE_NEUTRAL_WORD_END in text:
                        neutralEndMet = True
                        str_ = str_ + eachLine[:eachLine.index(
                            WORD_PROBE_NEUTRAL_WORD_END)].strip()
                        list_ = str_.split(",")
                        wppojo.setNeutralWordList(list_)
                        list_ = []
                        str_ = ""
                    else:
                        str_ = str_ + eachLine.strip()
                        neutralEndMet = False
                elif neutralEndMet and text.startswith(
                        INSTRUCTION_AFTER_BLOCK_START
                ) and INSTRUCTION_AFTER_BLOCK_END in text:
                    listTxt = eachLine[(
                        eachLine.index(INSTRUCTION_AFTER_BLOCK_START) +
                        len(INSTRUCTION_AFTER_BLOCK_START)
                    ):eachLine.index(INSTRUCTION_AFTER_BLOCK_END)].strip()
                    if not listTxt == None and len(listTxt) > 0:
                        list_ = listTxt.split(",")
                        if not list_ == None and len(list_) > 0:
                            wppojo.setAfterBlockInstructionList(list_)
                elif text.startswith(WORD_PROBE_END):
                    if wppojo.isValid():
                        self.WPPojoList.append(wppojo)
                    else:
                        errorMessage = self.message + " \nnumber of words or its types or number of probes-position are unequal for block " + blocknumber

    def readAllScripts(self, DPfilename, WPFilename):
        #         readAllScripts from utils
        self.readDPScript(DPfilename)
        self.readWPScript(WPFilename)
        if len(self.errorMessage) > 0:
            dial = wx.MessageDialog(None, self.errorMessage, "abcd",
                                    wx.OK | wx.ICON_INFORMATION)
            dial.ShowModal()
            quit

    def readDPScript(self, DPfilename):
        lines = self.fileOperation.read(DPfilename)
        str_ = ""
        list_ = []
        base_dir = os.path.dirname(__file__)
        path = os.path.dirname(base_dir) + "/dotProbe/images/"
        for eachLine in lines:
            if eachLine.strip() == "" or eachLine.strip()[0] == '#':
                continue
            else:
                text = eachLine.strip()
                if text.startswith(DOT_PROBE_START) and len(text) == len(
                        DOT_PROBE_START):
                    #start of each dot probe task block
                    dpPojo = DPPojo()
                elif text.startswith(BLOCK_START) and BLOCK_END in text:
                    blocknumber = eachLine[(
                        eachLine.index(BLOCK_START) +
                        len(BLOCK_START)):eachLine.index(BLOCK_END)].strip()
                    if blocknumber.isdigit():
                        dpPojo.setBlockNumber(int(blocknumber))
                    else:
                        self.errorMessage += "the block number for dot probe is not a number!!"
                elif text.startswith(
                        INSTRUCTION_BEFORE_BLOCK_START
                ) and INSTRUCTION_BEFORE_BLOCK_END in text:
                    listTxt = eachLine[(
                        eachLine.index(INSTRUCTION_BEFORE_BLOCK_START) +
                        len(INSTRUCTION_BEFORE_BLOCK_START)
                    ):eachLine.index(INSTRUCTION_BEFORE_BLOCK_END)].strip()
                    if not listTxt == None and len(listTxt) > 0:
                        list_ = listTxt.split(",")
                        if not list_ == None and len(list_) > 0:
                            dpPojo.setBeforeBlockMsgList(list_)
                elif text.startswith(UP_START) and UP_END in text:
                    str_ = eachLine[(
                        eachLine.index(UP_START) +
                        len(UP_START)):eachLine.index(UP_END)].strip()
                    list_ = str_.split(",")
                    self.message = ""
                    if not self.validateImageFile(path, list_):
                        if len(self.message) > 0:
                            self.errorMessage += self.message
                    else:
                        dpPojo.setUpImageList(list_)
                elif text.startswith(UP_TYPE_START) and UP_TYPE_END in text:
                    str_ = eachLine[(eachLine.index(UP_TYPE_START) +
                                     len(UP_TYPE_START)
                                     ):eachLine.index(UP_TYPE_END)].strip()
                    list_ = str_.split(",")

                    dpPojo.setUpImageType(list_)
                elif text.startswith(DOWN_START) and DOWN_END in text:
                    str_ = eachLine[(
                        eachLine.index(DOWN_START) +
                        len(DOWN_START)):eachLine.index(DOWN_END)].strip()
                    list_ = str_.split(",")
                    self.message = ""
                    if not self.validateImageFile(path, list_):
                        if len(self.message) > 0:
                            self.errorMessage += self.message
                    else:
                        dpPojo.setDownImageList(list_)
                elif text.startswith(
                        DOWN_TYPE_START) and DOWN_TYPE_END in text:
                    str_ = eachLine[(eachLine.index(DOWN_TYPE_START) +
                                     len(DOWN_TYPE_START)
                                     ):eachLine.index(DOWN_TYPE_END)].strip()
                    list_ = str_.split(",")
                    dpPojo.setDownImageType(list_)
                elif text.startswith(PROBE_START) and PROBE_END in text:
                    str_ = eachLine[(
                        eachLine.index(PROBE_START) +
                        len(PROBE_START)):eachLine.index(PROBE_END)].strip()
                    list_ = str_.split(",")
                    if not self.validateType(list_):
                        if len(self.message) > 0:
                            self.errorMessage = self.message + self.errorMessage
                    else:
                        dpPojo.setProbePosnList(list_)
                elif text.startswith(INSTRUCTION_AFTER_BLOCK_START
                                     ) and INSTRUCTION_AFTER_BLOCK_END in text:
                    listTxt = eachLine[(
                        eachLine.index(INSTRUCTION_AFTER_BLOCK_START) +
                        len(INSTRUCTION_AFTER_BLOCK_START)
                    ):eachLine.index(INSTRUCTION_AFTER_BLOCK_END)].strip()
                    if not listTxt == None and len(listTxt) > 0:
                        list_ = listTxt.split(",")
                        if not list_ == None and len(list_) > 0:
                            dpPojo.setAfterBlockMsgList(list_)
                elif text.startswith(DOT_PROBE_END):
                    if dpPojo.isValid():
                        self.DPPojoList.append(dpPojo)
                    else:
                        self.errorMessage = self.message + " \nnumber of images or its types or probes are unequal for block " + blocknumber

    def printDPPojoList(self):
        for DPPojo in self.DPPojoList:
            print DPPojo.toString()

    def printWPPojoList(self):
        for WpPojo_ in self.WPPojoList:
            print WpPojo_.toString()

    def validateImageFile(self, path, imageFileNameList):
        if len(imageFileNameList) <= 0:
            self.message = "no image file listed in the script\n"
            self.dialog.ShowMessage('Status Check', self.message)
            return False
        else:
            result = True
            for filename in imageFileNameList:
                if not (os.path.isfile(path + filename)):
                    self.message += path + filename + " is not a valid file.\n"
                    result = False
        return result

    def validateType(self, typeList):
        result = True
        if len(typeList) <= 0:
            self.message = "no probe type listed in the script\n"
        for eachType in typeList:
            if not (eachType.upper() == PROBE_DOWN
                    or eachType.upper() == PROBE_UP):
                self.message = "PROBE cannot be other than up or down.\n"
                result = False
        return result

    def getMessagePojoByID(self, num_id):
        for msg in self.messageList:
            if msg.getID() == num_id:
                return msg
        return None
예제 #24
0
 def __init__(self):
     self.message = ""
     print "===> utils.py initiated"
     self.fileOperation = FileOperations()
예제 #25
0
import json
from FileOperations import FileOperations 
from sklearn.naive_bayes import MultinomialNB
import scipy
import time

fo = FileOperations("../../input.json")
fo.get_json()
#fo.normalize()
#tokens = fo.tokenize()

#get the tf_idf data and label
split = fo.num_lines / 100 * 80
data = fo.get_tfidf()
lable = fo.get_value()

#split the data to 80% and 20%
train_data = data[:split]
train_lable = lable[:split]
test_data = data[split:]
test_lable = lable[split:]

start = time.time()
clf_bayes = fo.train_bayes_model(train_data,train_lable)
end = time.time()
print "Train Time:" + str(end - start) + 's'
start = time.time()
TP, FP, FN, TN = fo.score(clf_bayes, test_data, test_lable)
end = time.time()
print "Test Time:" + str(end - start) + 's'
print "Accuracy: " , float(TP + TN) / float(TP + FP + FN + TN)
예제 #26
0
 def test_validate_file(self):
     validate_result = FileOperations.validate_file("./test/test_inputs/test_search_string.txt", ["Google Analytics"],
                                                    {"CONTENT": {"registered": "'([\w\s]+)'"},
                                                     "built-in": ["DATE UPDATED"]})
     self.assertEqual(validate_result["registered"][1], "Site Catalyst")
     self.assertGreater(time.localtime(), time.strptime(validate_result["DATE UPDATED"]))
예제 #27
0
 def test_crawl_fake_directCrawl(self):
     parameters = FileOperations.get_from_JSON_file("./test/search_parameters.json")
     Crawler.crawl(parameters["crawling"], parameters["rules"], parameters["result"], parameters["output"])
     self.assertTrue(os.path.isfile(parameters["output"]["path"]))
     result_from_file = FileOperations.get_from_JSON_file(parameters["output"]["path"])
     self.assertEqual(len(result_from_file), 3)
예제 #28
0
 def test_crawl_native_minimalParameterFile_multithreaded_native(self):
     parameters = FileOperations.get_from_JSON_file("./test/minimal_parameters.json")
     data = Crawler.crawl_multithread(parameters["crawling"], parameters["rules"], parameters.get("result"))
     self.assertEqual(data['./test/test_inputs/minimalist_data.txt']['matches']['HasName']['city'][0], 'London')
예제 #29
0
    def check_depend_then_ren_and_embed_original_metadata(
            self, append_faststart=True, artwork=False, copy_chapters=False):
        """This method will run the "check_dependencies_then_render" method and attempt to embed any artwork from the
		original file into the output (due to how ffmpeg works, the artwork can't always be copied in one command.)\n
		if artwork is True it will try to embed artwork from the input into the output specifically.
		This may happen if ffmpeg tries to output artwork to the first stream of an audio only file."""

        # Run standard command to render output.
        out_file_exists_result = self.check_depend_then_ren(
            append_faststart=append_faststart)

        if type(self.in_path) is list:
            in_meta_file = self.in_path[0]
        else:
            in_meta_file = self.in_path

        # If the output file exists then run the attempt_embed_metadata_silently method.
        if out_file_exists_result is True:
            # NOTE: This import is down here to avoid an infinite import.
            from FileOperations import FileOperations
            # This will attempt to embed any metadata (mainly for artwork) from the original file into the output.
            # (Due to how ffmpeg works, the artwork can't always be copied in one command.)
            # Create temporary output file with the original metadata embedded, delete the original output without the metadata,
            # and rename this temporary output to the desired output.
            for out_path in self.out_paths_list:
                temp_directory_to_embed_metadata = paths.Path().joinpath(
                    out_path.parent, '--temp_dir_to_embed_metadata_silently')
                paths.Path.mkdir(temp_directory_to_embed_metadata)
                temp_out_file = paths.Path().joinpath(
                    temp_directory_to_embed_metadata,
                    out_path.stem + out_path.suffix)
                FileOperations(out_path, temp_directory_to_embed_metadata,
                               False, self.print_ren_info, False,
                               False).copy_over_metadata(
                                   in_meta_file, copy_chapters)
                if temp_out_file.exists() is False:
                    if self.print_err is True:
                        print(
                            f'Error, input file to extract metadata silently from "{out_path}" not found.'
                        )
                    paths.Path(temp_directory_to_embed_metadata).rmdir()
                else:
                    out_path.unlink()
                    temp_out_file.rename(out_path)
                if artwork is True:
                    temp_art = FileOperations(
                        in_meta_file, temp_directory_to_embed_metadata, False,
                        self.print_ren_info, False, False).extract_artwork()
                    if temp_art is not False:
                        if temp_art.exists():
                            FileOperations(out_path,
                                           temp_directory_to_embed_metadata,
                                           False, self.print_ren_info, False,
                                           False).embed_artwork(temp_art)
                            temp_art.unlink()
                            out_path.unlink()
                            temp_out_file.rename(out_path)
                temp_directory_to_embed_metadata.rmdir()
                return True

        else:
            # A problem occurred while rendering and no output file was created so quit.
            return False
class IndexGenerator:
    def __init__(self):
        self.fileOperation = FileOperations()

    # 0        1        2            3            4            5            6            7            8            9            10
    # S.N.    date    subject    trialcode    currentblock    number    up_type    down_type    trialtimeout    correct    latency
    def calculateindex(self, filename):
        self.DAFI = 0.0
        self.PAFI = 0.0
        N_UP = 0.0
        N_DOWN = 0.0
        D_UP = 0.0
        D_DOWN = 0.0
        P_UP = 0.0
        P_DOWN = 0.0
        #*****************************************************************************************#
        #*******************for N_up and N _down************************#
        self.neutral_neutralCount = 0
        self.neutral_neutral_case = False
        self.upProbeInNeutral_neutral = 0
        self.downProbeInNeutral_neutral = 0
        self.responseTimeOnUpProbe_inNeutral_neutral = 0.0
        self.responseTimeOnDownProbe_inNeutral_neutral = 0.0
        #*****************************************************************************************#
        #************************for D_up *****************************************#
        self.negative_up_anyCount = 0
        self.probeUp_inNegative_up = 0
        self.negative_up = False
        self.responseTimeOnProbe_Up_Negative_Up = 0.0
        #*****************************************************************************************#
        #************************for D_down *****************************************#
        self.negative_down_anyCount = 0
        self.probeDown_inNegative_down = 0
        self.negative_down = False
        self.responseTimeOnProbe_Down_Negative_Down = 0.0
        #*****************************************************************************************#
        #************************for P_up *****************************************#
        self.positive_Up_anyCount = 0
        self.ProbeUp_inPositive_up = 0
        self.positive_up = False
        self.responseTimeOnProbe_Up_Positie_Up = 0.0

        #*****************************************************************************************#
        #************************for P_up *****************************************#
        self.positive_Down_anyCount = 0
        self.probeDown_inPositie_down = 0
        self.positive_down = False
        self.responseTimeOnProbe_Down_Positive_Down = 0.0
        #*****************************************************************************************#
        lines = self.fileOperation.read(filename)
        data = []
        lineNumberToNeglectForTraining = 0
        lineNumber = 0
        for eachLine in lines:
            lineNumber += 1
            if eachLine.strip() == "" or eachLine.strip()[0] == '#':
                continue
            else:
                data = eachLine.split("\t")
                if len(data) > 12:
                    print "length >11"
                else:
                    if data[3].strip().upper(
                    ) == REPORT.TRIALCODE[FIXATION_INTRIALCODE]:
                        continue
                    if data[3].strip().upper(
                    ) == REPORT.TRIALCODE[PRACTISEIMAGE_INTRIALCODE]:
                        #neglect the training data
                        lineNumberToNeglectForTraining = lineNumber + 1
                        #if the trialcode=PRACTISE_PIC then we neglect upto the next line which contains the probe tracks
                        continue
                    if lineNumber <= lineNumberToNeglectForTraining:
                        continue

                    if self.positive_up:  #P_UP
                        self.positive_Up_anyCount += 1
                        self.positive_up = False

                        if int(data[9].strip()) == 1 and data[3].strip().upper(
                        ) == REPORT.TRIALCODE[UP_PROBE_INTRIALCODE]:
                            self.ProbeUp_inPositive_up += 1
                            self.responseTimeOnProbe_Up_Positie_Up += float(
                                data[10].strip()) * 1000

                    if self.positive_down:  #P_DOWN
                        self.positive_Down_anyCount += 1
                        self.positive_down = False
                        if int(data[9].strip()) == 1 and data[3].strip().upper(
                        ) == REPORT.TRIALCODE[dOWN_PROBE_INTRIALCODE]:
                            self.probeDown_inPositie_down += 1
                            self.responseTimeOnProbe_Down_Positive_Down += float(
                                data[10].strip()) * 1000

                    if self.negative_up:  #D_UP
                        self.negative_up_anyCount += 1
                        self.negative_up = False
                        if int(data[9].strip()) == 1 and data[3].strip().upper(
                        ) == REPORT.TRIALCODE[UP_PROBE_INTRIALCODE]:
                            self.probeUp_inNegative_up += 1
                            self.responseTimeOnProbe_Up_Negative_Up += float(
                                data[10].strip()) * 1000

                    if self.negative_down:  #D_DOWN
                        self.negative_down_anyCount += 1
                        self.negative_down = False
                        if int(data[9].strip()) == 1 and data[3].strip().upper(
                        ) == REPORT.TRIALCODE[dOWN_PROBE_INTRIALCODE]:
                            self.probeDown_inNegative_down += 1
                            self.responseTimeOnProbe_Down_Negative_Down += float(
                                data[10].strip()) * 1000

                    if self.neutral_neutral_case:  #N_UP and N_DOWN
                        self.neutral_neutral_case = False
                        if int(data[9].strip()) == 1 and data[3].strip().upper(
                        ) == REPORT.TRIALCODE[UP_PROBE_INTRIALCODE]:
                            self.neutral_neutralCount += 1
                            self.upProbeInNeutral_neutral += 1
                            self.responseTimeOnUpProbe_inNeutral_neutral += float(
                                data[10].strip()) * 1000
                        elif int(data[9].strip()) == 1 and data[3].strip(
                        ).upper() == REPORT.TRIALCODE[dOWN_PROBE_INTRIALCODE]:
                            self.neutral_neutralCount += 1
                            self.downProbeInNeutral_neutral += 1
                            self.responseTimeOnDownProbe_inNeutral_neutral += float(
                                data[10].strip()) * 1000
                        elif not (data[3].strip().upper()
                                  == REPORT.TRIALCODE[dOWN_PROBE_INTRIALCODE]
                                  or data[3].strip().upper()
                                  == REPORT.TRIALCODE[UP_PROBE_INTRIALCODE]):
                            print "this is due to incorrect data"

                    if data[6].strip().upper() == "NEUTRAL" and data[7].strip(
                    ).upper() == "NEUTRAL":  #N_UP and N_DOWN
                        self.neutral_neutral_case = True

                    if data[6].strip().upper() == "NEGATIVE":  #D_UP
                        self.negative_up = True

                    if data[7].strip().upper() == "NEGATIVE":  #D_DOWN
                        self.negative_down = True

                    if data[6].strip().upper() == "POSITIVE":  #P_UP
                        self.positive_up = True

                    if data[7].strip().upper() == "POSITIVE":  #P_DOWN
                        self.positive_down = True

        #*****************************************************************************************#
        if not self.neutral_neutralCount == (self.upProbeInNeutral_neutral +
                                             self.downProbeInNeutral_neutral):
            print "please verify the number of " + REPORT.TRIALCODE[
                UP_PROBE_INTRIALCODE] + " and " + REPORT.TRIALCODE[
                    dOWN_PROBE_INTRIALCODE] + " .Their sum do not match the total number of total neutral neutral"

        print "all the calculations are done by neglecting the incorrect responses i.e. if correct=0 then that data is neglected"
        print "#*****************************************************************************************#"
        print "total no. of CASE: UP_NEUTRAL_DOWN_NEUTRAL = " + str(
            self.neutral_neutralCount)
        print "\n#**********************N_UP***************************#\n"
        print "total no. of CASE: UP_PROBE in UP_NEUTRAL_DOWN_NEUTRAL = " + str(
            self.upProbeInNeutral_neutral)
        print "sum of response time  for CASE: UP_PROBE in UP_NEUTRAL_DOWN_NEUTRAL = " + str(
            self.responseTimeOnUpProbe_inNeutral_neutral)
        if not self.upProbeInNeutral_neutral == 0:
            N_UP = self.responseTimeOnUpProbe_inNeutral_neutral / self.upProbeInNeutral_neutral
        print "average response time for CASE: UP_PROBE in UP_NEUTRAL_DOWN_NEUTRAL = (N_UP) " + str(
            N_UP)
        print "\n#**********************N_DOWN***************************#\n"
        print "total no. of CASE:DOWN_PROBE in UP_NEUTRAL_DOWN_NEUTRAL = " + str(
            self.downProbeInNeutral_neutral)
        print "sum of response time  for CASE: DOWN_PROBE in UP_NEUTRAL_DOWN_NEUTRAL = " + str(
            self.responseTimeOnDownProbe_inNeutral_neutral)
        if not self.downProbeInNeutral_neutral == 0:
            N_DOWN = self.responseTimeOnDownProbe_inNeutral_neutral / self.downProbeInNeutral_neutral
        print "average response time for CASE: DOWN_PROBE in UP_NEUTRAL_DOWN_NEUTRAL (N_DOWN) = " + str(
            N_DOWN)
        print "*************************************************************************************************\n"
        print "\n#**********************D_UP***************************#\n"
        #         print "total no. of CASE: UP_NEGATIVE_DOWN_ANY = "+str(self.negative_up_anyCount)
        print "total no. of CASE: UP_PROBE in UP_NEGATIVE_DOWN_ANY = " + str(
            self.probeUp_inNegative_up)
        print "sum of response time  for CASE: UP_PROBE in UP_NEGATIVE_DOWN_ANY  = " + str(
            self.responseTimeOnProbe_Up_Negative_Up)
        if not self.probeUp_inNegative_up == 0:
            D_UP = self.responseTimeOnProbe_Up_Negative_Up / self.probeUp_inNegative_up
        print "average response time for CASE: UP_PROBE in UP_NEGATIVE_DOWN_ANY (D_UP)  = " + str(
            D_UP)
        print "\n#**********************D_DOWN***************************#\n"
        #         print "total no. of CASE: UP_ANY_DOWN_NEGATIVE = "+str(self.negative_down_anyCount)
        print "total no. of CASE: DOWN_PROBE in UP_ANY_DOWN_NEGATIVE = " + str(
            self.probeDown_inNegative_down)
        print "sum of response time  for CASE: DOWN_PROBE in UP_ANY_DOWN_NEGATIVE  = " + str(
            self.responseTimeOnProbe_Down_Negative_Down)
        if not self.probeDown_inNegative_down == 0:
            D_DOWN = self.responseTimeOnProbe_Down_Negative_Down / self.probeDown_inNegative_down
        print "average response time for CASE: DOWN_PROBE in UP_ANY_DOWN_NEGATIVE (D_DOWN) = " + str(
            D_DOWN)
        print "\n#**********************Distress Attentional Facilitation Index (Y)***************************#\n"
        self.DAFI = 0.5 * ((N_UP - D_UP) + (N_DOWN - D_DOWN))
        print "Distress Attentional Facilitation Index (Y) = " + str(self.DAFI)
        print "*************************************************************************************************\n"
        print "\n#**********************P_UP***************************#\n"
        #         print "total no. of CASE: UP_POSITIVE_DOWN_ANY = "+str(self.positive_Up_anyCount)
        print "total no. of CASE: UP_PROBE in UP_POSITIVE_DOWN_ANY = " + str(
            self.ProbeUp_inPositive_up)
        print "sum of response time  for CASE: UP_PROBE in UP_POSITIVE_DOWN_ANY  = " + str(
            self.responseTimeOnProbe_Up_Positie_Up)
        if not self.ProbeUp_inPositive_up == 0:
            P_UP = self.responseTimeOnProbe_Up_Positie_Up / self.ProbeUp_inPositive_up
        print "average response time for CASE: UP_PROBE in UP_NEGATIVE_DOWN_ANY (P_UP) = " + str(
            P_UP)
        print "\n#**********************P_DOWN***************************#\n"

        #         print "total no. of CASE: UP_ANY_DOWN_POSITIVE = "+str(self.positive_Down_anyCount)
        print "total no. of CASE: DOWN_PROBE in UP_ANY_DOWN_POSITIVE = " + str(
            self.probeDown_inPositie_down)
        print "sum of response time  for CASE: DOWN_PROBE in UP_ANY_DOWN_POSITIVE  = " + str(
            self.responseTimeOnProbe_Down_Positive_Down)
        if not self.probeDown_inPositie_down == 0:
            P_DOWN = self.responseTimeOnProbe_Down_Positive_Down / self.probeDown_inPositie_down
        print "average response time for CASE: DOWN_PROBE in UP_ANY_DOWN_POSITIVE (P_DOWN) = " + str(
            P_DOWN)
        print "\n#**********************Positive Attentional Facilitation Index (P)***************************#\n"
        self.PAFI = 0.5 * ((N_UP - P_UP) + (N_DOWN - P_DOWN))
        print "Positive Attentional Facilitation Index (P) = " + str(self.PAFI)
 def __init__(self):
     self.fileOperation = FileOperations()
예제 #32
0
 def test_crawl_native_minimalParameterFile_multithreaded(self):
     c = Crawler("MyMinimalCrawler", FileOperations.get_from_JSON_file("./test/minimal_parameters.json"))
     self.assertEqual(c.crawl_native(threads=10)['./test/test_inputs/minimalist_data.txt']['matches']['HasName']['city'][0], 'London')
예제 #33
0
class DistanceReader:
    def __init__(self, distReadFile, distSaveDirectory, distSaveFilename,
                 interval, length):
        self.distanceReadFile = distReadFile
        self.distanceSaveFile = os.path.join(distSaveDirectory,
                                             distSaveFilename)
        self.sampleInterval = interval
        self.isRecording = False
        self.sampleLength = length
        self.recordingLoopActive = False
        self.threadsStarted = False
        self.nextTimer = None
        self.distLogger = FileOperations(distSaveDirectory, distSaveFilename)

    #starts recording distance at specified interval
    def startRecording(self):
        self.recordingLoopActive = True
        if (self.threadsStarted == False):
            threading.Timer(0, self.sampleDistanceWithInterval, ()).start()
            self.threadsStarted = True

    #requests recording thread to stop+pi-
    def stopRecording(self):
        self.recordingLoopActive = False

    #used to force the thread to stop recording if there was an error in recording data
    def resetIsRecording(self):
        self.isRecording = False

    #this method is called by timer threads to record data at the specified interval
    def sampleDistanceWithInterval(self):
        #launching next timer thread to record temp after specified interval
        self.nextTimer = threading.Timer(self.sampleInterval,
                                         self.sampleDistanceWithInterval, ())
        self.nextTimer.start()
        if (self.recordingLoopActive == True and self.storage.hasSpace()):
            self.isRecording = True
            #line below ensures that, even when there is an error recording distance, isRecording won't stay on
            #The pi has 10 seconds to record temperature
            threading.Timer(self.sampleLength + 15, self.resetIsRecording,
                            ()).start()
            timestamp = getTimestamp()
            try:
                self.logger.log("[DistanceSensor] started recording distance")
                end_time = time.time() + self.sampleLength
                while time.time() < end_time:
                    distance = self.readDistance()
                    timestamp = TimeUtils.getTimestamp()
                    output = "%s %f\n" % (timestamp, distance)
                    self.distLogger.appendToFile(output)
                self.logger.log("[DistacneReaader] recorded distance")
            except Exception as e:
                self.logger.logError("DistanceReader",
                                     "Error reading distance", e)
            self.isRecording = False

    #this method is to take input and output from the sensor and return the measured distance to measure_average
    def measure(self):
        GPIO.output(24, True)
        time.sleep(0.00001)
        GPIO.output(24, False)
        start = time.time()

        while GPIO.input(23) == 0:
            start = time.time()

        while GPIO.input(23) == 1:
            stop = time.time()

        elapsed = stop - start
        distance = (elapsed * 34300) / 2
        return distance

    #this method returns average of distance to readDistance (this method is for accuracy)
    def measure_average(self):

        distance1 = self.measure()
        time.sleep(0.1)
        distance2 = self.measure()
        distance = distance1 + distance2
        distance = distance / 2
        return distance

    #this method is reading the distance
    def readDistance(self):
        with open(self.distanceReadFile, 'r') as distanceFile:
            while True:
                distance = self.measure_average()
                time.sleep(0.1)
                return distance
            GPIO.cleanup()

    #cancels any timers that are waiting to excecute. Used when quitting the program
    def quit(self):
        if self.nextTimer != None:
            self.nextTimer.cancel()

    def setLogger(self, logger):
        self.logger = logger

    def setStorage(self, storage):
        self.storage = storage
예제 #34
0
파일: task3.py 프로젝트: Song-Li/textMining
from FileOperations import FileOperations
import nltk
from nltk.tag.stanford import StanfordPOSTagger
from nltk.corpus import stopwords
import operator
import os
import re

# set the java environment variables:
# CLASSPATH is the path to the stanford-postagger.jar in your local disk
# STANFORD_MODELS is the path to the tagger file in your local disk
os.environ[
    'CLASSPATH'] = '/home/sol315/Downloads/stanford-postagger-2015-12-09/stanford-postagger.jar'
os.environ['STANFORD_MODELS'] = './models/english-left3words-distsim.tagger'

fo = FileOperations("taged.data")
tages = fo.get_taged_data()

origin = FileOperations("../input.json")
origin.get_json()

stop = set(stopwords.words('english'))

pairs = dict()
attributes = dict()
regex = re.compile('[^a-zA-Z]')

#this for loop is only used for get the attributes of task 2
for line in tages:
    for tag in line:
        if tag[1] == 'NN' or tag[1] == 'NNS':
예제 #35
0
#def getTimestamp():
#	return time.strftime('%Y-%m-%d_%H-%M-%S')


#utility method to set up GPIO used by sensors attached to the pi
#called at the beginning of __main__
def setupGPIO():
    os.system("sudo modprobe w1-therm")
    os.system("sudo modprobe w1-gpio")
    GPIO.setmode(GPIO.BCM)
    GPIO.setup(17, GPIO.IN, GPIO.PUD_UP)


#need to set up filepath and filename when we get config in __main__
#used for all classes and threads in this file for logging purposes
logger = FileOperations()

#Need to setup actual minFreeMB once we get config data in __main__
#used by recording threads to check if there is enough room on the pi to record data
storage = Storage()
storage.setLogger(logger)


#parses data in config file and returns a map of data entries to values
def readConfig():
    configDict = {}

    #finding and opening config file
    #parses config file with built in python config parser
    local_file_path = os.path.dirname(os.path.realpath(__file__)) + '/'
    config = ConfigParser.ConfigParser()
예제 #36
0
 def test_save_to_CSV(self):
     FileOperations.save_dict_to_CSV({"first,row":{"first":1, "second":2, "third":3}}, "./test/test_outputs/save_csv.csv", {"First Col", "Second col", "Third col"})
     f = open("./test/test_outputs/save_csv.csv")
     self.assertIsNotNone(f)
     f.close()
예제 #37
0
from FileOperations import FileOperations
import nltk
from nltk.tag.stanford import StanfordPOSTagger
from nltk.corpus import stopwords
import operator
import os
import re

# set the java environment variables:
# CLASSPATH is the path to the stanford-postagger.jar in your local disk
# STANFORD_MODELS is the path to the tagger file in your local disk
os.environ[
    'CLASSPATH'] = '/home/sol315/Downloads/stanford-postagger-2015-12-09/stanford-postagger.jar'
os.environ['STANFORD_MODELS'] = './models/english-left3words-distsim.tagger'

fo = FileOperations("taged.data")
tages = fo.get_taged_data()

stop = set(stopwords.words('english'))

attributes = dict()
regex = re.compile('[^a-zA-Z]')

for line in tages:
    for tag in line:
        if tag[1] == 'NN' or tag[1] == 'NNS':
            tag[0] = regex.sub('', tag[0]).lower()
            if tag[0] in stop or len(tag[0]) <= 1:
                tag[1] = 'STOP'
            elif tag[0] in attributes:
                attributes[tag[0]] += 1
예제 #38
0
            if course.theme not in theme_list:
                theme_list.append(course.theme)

        return sorted(theme_list)


if __name__ == "__main__":

    from FileOperations import FileOperations

    data_handler = None

    print("Sample data\n")
    input_filename = "data.a"

    file_handler = FileOperations(input_filename)

    if (file_handler.status):
        print(file_handler.data, "\n")
        data_handler = DataHandler(file_handler.data)

        data_handler.print_courses_list()

    from Command import VALID_COMMANDS_REQ

    print("\nCommand: locations")
    command = ["locations"]
    data_handler.process_command(command)

    print("\nCommand: courses <location> <theme>")
    print("\nEg: 1")
예제 #39
0
 def test_getDataFromJSON(self):
     data = FileOperations.get_from_JSON_file("./test/search_parameters.json")
     self.assertIsNotNone(data)
     self.assertEqual(data["result"]["built-in"][2], "AUTHOR")
예제 #40
0
    nameOut = (name + "_Constraints")
    DOPobject.separateChildContent(constraintsDF,
                                   "Constraint Type",
                                   ret=0,
                                   name=nameOut)

    nameOut = (name + "_Products")
    DOPobject.processSingleDataset(productDF, nameOut)


from FileOperations import FileOperations
expPath = root + config["DEFAULT"]["ExportPath"]

#f = FileOperations("E:/CUA OpenBank API/OpenBanking/DataProcesing")
f = FileOperations(root)
i = 0

#for sheet in [3,4,5,6]:
sheets = [
    'TRANS_AND_SAVINGS_ACCOUNTS', 'CRED_AND_CHRG_CARDS', 'TERM_DEPOSITS',
    'TERM_DEPOSITS_RATES'
]

for sheet in sheets:
    if sheet == 'TERM_DEPOSITS_RATES':
        rates = SP(dataFile, sheet)
        rates.log = log
        rates.path = exportPath + "/"
        rates.createDict()
예제 #41
0
def main(argv):
    BaseBackupArch = ''
    Directory = ''
    DatabaseUser = ''
    DatabaseGroup = ''
    PostgresConfig = ''
    PostgresPort = ''
    try:
      opts, args = getopt.getopt(argv,"ha:d:u:g:c:p:",["archive=","directory=","user="******"group=","config=","port="])
    except getopt.GetoptError:
      print '%s -a <basebackup tar archive> -d <new PGDATA directory> [-u <postgres user, default postgres> -g <postgres group, default postgres>, -c <path to postgres config, default in new PGDATA dir> -p <port for new pg_cluster, default 5433>]' % os.path.abspath(__file__)
      sys.exit(2)
    if len(opts) == 0:
      print '%s -a <basebackup tar archive> -d <new PGDATA directory> [-u <postgres user, default postgres> -g <postgres group, default postgres>, -c <path to postgres config, default in new PGDATA dir> -p <port for new pg_cluster, default 5433>]' % os.path.abspath(__file__)
      sys.exit(2)
    for opt, arg in opts:
      if opt == '-h':
         print '%s -a <basebackup tar archive> -d <new PGDATA directory> [-u <postgres user, default postgres> -g <postgres group, default postgres>, -c <path to postgres config, default in new PGDATA dir> -p <port for new pg_cluster, default 5433>]' % os.path.abspath(__file__)
         sys.exit()
      elif opt in ("-a", "--archive"):
         BaseBackupArch = arg
      elif opt in ("-d", "--directory"):
         Directory = arg
      elif opt in ("-u", "--user"):
         DatabaseUser = arg
      elif opt in ("-g", "--group"):
         DatabaseGroup = arg
      elif opt in ("-c", "--config"):
         PostgresConfig = arg
      elif opt in ("-p", "--port"):
         PostgresPort = arg

    if (os.path.isfile(BaseBackupArch) and os.path.exists(Directory)):
      if (os.path.isabs(BaseBackupArch) and os.path.isabs(Directory)):
        ExtractFile(BaseBackupArch, Directory)

        logging.info('Succefully extracted basebackup archive file: %s into directory: %s' % (BaseBackupArch, Directory))
        fop = FileOperations(Directory)

        if DatabaseUser and DatabaseGroup:
          fop.setFileOwner(DatabaseUser, DatabaseGroup)
          fop.setFilePerm()
        elif DatabaseUser:
          fop.setFileOwner(user=DatabaseUser)
          fop.setFilePerm()
        elif DatabaseGroup:
          fop.setFileOwner(group=DatabaseGroup)
          fop.setFilePerm()
        else:
          fop.setFileOwner()
          fop.setFilePerm()

        logging.info('Succefully prepare new PGDATA directory: %s' % Directory)

        if PostgresConfig:
          if os.path.isfile(PostgresConfig):
            if PostgresPort:
              process_config(PostgresConfig,PostgresPort)
            else:
              process_config(PostgresConfig)
          else:
            logging.error('%s no such file' % PostgresConfig)
        else:
          if Directory.endswith("/"):
              PostgresConfig=Directory + 'postgresql.conf'
          else:
              PostgresConfig=Directory + '/postgresql.conf'
          if os.path.isfile(PostgresConfig):
            if PostgresPort:
                if Directory.endswith('/'):
                    process_config(config=PostgresConfig, logFileName=Directory.split('/')[-2], port=PostgresPort)
                else:
                    process_config(config=PostgresConfig, logFileName=Directory.split('/')[-1], port=PostgresPort)
            else:
                if PostgresPort:
                    if Directory.endswith('/'):
                        process_config(config=PostgresConfig, logFileName=Directory.split('/')[-2])
                    else:
                        process_config(config=PostgresConfig, logFileName=Directory.split('/')[-1])
          else:
            logging.error('%s no such file' % PostgresConfig)
      else:
        logging.error('Check archive file or directory have an absolute path')
        sys.exit(2)
    else:
      logging.error('Check archive file or directory exists')
      sys.exit(2)
예제 #42
0
class TemperatureReader:
	def __init__(self, tempReadFile, tempSaveDirectory, tempSaveFilename, interval):
		self.temperatureReadFile = tempReadFile
		self.temperatureSaveFile = os.path.join(tempSaveDirectory,tempSaveFilename)
		self.sampleInterval = interval
		self.isRecording = False
		self.recordingLoopActive = False
		self.threadsStarted = False
		self.nextTimer = None
		self.tempLogger = FileOperations(tempSaveDirectory, tempSaveFilename)
	
	#starts recording temperature at specified interval
	def startRecording(self):
		self.recordingLoopActive = True
		if(self.threadsStarted == False):
			threading.Timer(0, self.sampleTemperatureWithInterval, ()).start()
			self.threadsStarted = True
	
	#requests recording thread to stop+pi-
	def stopRecording(self):
		self.recordingLoopActive = False
	
	#used to force the thread to stop recording if there was an error in recording data
	def resetIsRecording(self):
		self.isRecording = False
	
	#this method is called by timer threads to record data at the specified interval
	def sampleTemperatureWithInterval(self):
		#launching next timer thread to record temp after specified interval
		self.nextTimer = threading.Timer(self.sampleInterval, self.sampleTemperatureWithInterval, ())
		self.nextTimer.start()
		if(self.recordingLoopActive == True and self.storage.hasSpace()):
			self.isRecording = True
			#line below ensures that, even when there is an error recording temperature, isRecording won't stay on
			#The pi has 10 seconds to record temperature
			threading.Timer(10, self.resetIsRecording, ()).start()
			try:
				temperature = self.readTemperature()
				timestamp = TimeUtils.getTimestamp()
				output = "%s %s\n" % (timestamp, temperature)
				#adding temperature to temperature file
				self.tempLogger.appendToFile(output)
				self.logger.log("[TemperatureReader] Recorded temperature")
			except Exception as e:
				self.logger.logError("TemperatureReader", "Error reading temperature", e)
			self.isRecording = False
	
	#parses system file to get temperature in Celcuis
	def readTemperature(self):
		with open(self.temperatureReadFile, 'r') as temperatureFile:
			text = temperatureFile.read()
			secondLine = text.split('\n')[1]
			temperatureData = secondLine.split(' ')[9]
			temperature = float(temperatureData[2:])
			return temperature/1000
	
	#cancels any timers that are waiting to excecute. Used when quitting the program
	def quit(self):
		if self.nextTimer != None:
			self.nextTimer.cancel()
			
	def setLogger(self, logger):
		self.logger = logger
		
	def setStorage(self, storage):
		self.storage = storage
예제 #43
0
import json
from FileOperations import FileOperations
import nltk
from nltk.tag.stanford import StanfordPOSTagger
import os

# set the java environment variables:
# CLASSPATH is the path to the stanford-postagger.jar in your local disk
# STANFORD_MODELS is the path to the tagger file in your local disk
os.environ[
    'CLASSPATH'] = '/home/sol315/Downloads/stanford-postagger-2015-12-09/stanford-postagger.jar'
os.environ['STANFORD_MODELS'] = './models/english-left3words-distsim.tagger'

fo = FileOperations("../input.json")
fo.get_json()
st = StanfordPOSTagger('english-bidirectional-distsim.tagger')
f = open('taged.data', 'a')
cur = 0
for line in fo.reviews:
    cur += 1
    print cur, cur * 100 / fo.num_lines, '%'
    res = st.tag(line.split())
    json_tag = json.dumps(res)
    f.write(json_tag)
    f.write('\n')
예제 #44
0
import argparse
from FileOperations import FileOperations as FO
from Crawler import Crawler
import os.path

parser = argparse.ArgumentParser(description='Crawl file and execute regex rules on them')
parser.add_argument('-p', metavar='ParameterFilePath', type=argparse.FileType('r'), required=True,
                   help="path to a parameter json file. Parameter file should contain a 'crawling', 'rules' and 'result' key")
parser.add_argument('-o', metavar='OutputFilePath', type=argparse.FileType('w+'), help='output file. This argument is required if no output is specified in parameter file.\n The file must be either a .csv or .json')
parser.add_argument('-mt', metavar='Thread Numbers', type=int, help='have a multi-threaded cralwer (1 thread per file) and precise the number of concurrent thread')
parser.add_argument('-s', metavar='StartDirectory', type=str, help='directory in which the crawling will start. This parameter is necessary if there is no "crawling" dictionary in the parameter file')

args = parser.parse_args()
if "p" not in args or args.p is None:
    parser.error(parser.format_usage())
param = FO.get_from_JSON_file(args.p.name)
if "rules" not in param or ("o" not in args and "output" not in param):
    print("rules error")
    parser.error(parser.format_usage())
if "crawling" not in param and ("s" not in args or args.s is None):
    parser.error(parser.format_usage())
elif "s" in args and args.s is not None:
    param["crawling"] = { "start": args.s}
if "o" in args and args.o is not None:
    output_name, output_extension = os.path.splitext(args.o.name)
    param["output"] = {
        "path": args.o.name,
        "type": "csv" if ".csv" in output_extension else "json"
    }
if "mt" in args and args.mt is not None:
    Crawler.crawl_multithread(param.get("crawling"), param.get("rules"), param.get("result"), param["output"], args.mt)
예제 #45
0
    nameOut = (name + "_Constraints")
    DOPobject.separateChildContent(constraintsDF,
                                   "Constraint Type",
                                   ret=0,
                                   name=nameOut)

    nameOut = (name + "_Products")
    DOPobject.processSingleDataset(productDF, nameOut)


from FileOperations import FileOperations
expPath = root + config["DEFAULT"]["ExportPath"]

#f = FileOperations("E:/CUA OpenBank API/OpenBanking/DataProcesing")
f = FileOperations(root)
i = 0

#for sheet in [3,4,5,6]:
sheets = [
    'TRANS_AND_SAVINGS_ACCOUNTS', 'CRED_AND_CHRG_CARDS', 'TERM_DEPOSITS',
    'TERM_DEPOSITS_RATES'
]

for sheet in sheets:
    if sheet == 'TERM_DEPOSITS_RATES':
        rates = SP(dataFile, sheet)
        if not rates.df.iloc[:, 2:].empty:
            rates.log = log
            rates.path = exportPath + "/"
            rates.createDict()