def test_GetDueTodayTask(self): extractor = Extractor() result1 = extractor.extract("Apa saja deadline hari ini?", Context.getDueTodayTask) result2 = extractor.extract("Deadline tubes hari ini apa saja, ya?", Context.getDueTodayTask) result3 = extractor.extract("yang deadline pada hari ini", Context.getDueTodayTask) result4 = extractor.extract( "Bot, minta daftar deadline dong pada hari ini. Makasih :)", Context.getDueTodayTask) result5 = extractor.extract( "Untuk tucil, deadline pada hari ini apa saja?", Context.getDueTodayTask) result6 = extractor.extract( "Tubes yang deadline pada hari ini apa saja?", Context.getDueTodayTask) assert result1 != None assert result1.jenisTask == "" assert result2 != None assert result2.jenisTask == "tubes" assert result3 != None assert result3.jenisTask == "" assert result4 != None assert result4.jenisTask == "" assert result5 != None assert result5.jenisTask == "tucil" assert result6 != None assert result6.jenisTask == "tubes"
def process_all(self): try: print('process_all ... begin') extractor = Extractor(self.language) transformer = Transformer() extractor.connect() num_of_products = extractor.get_num_of_products() extractor.execute() num_of_rows = 10 rows = extractor.get_next_batch(num_of_rows) pipeline = self.loader.create_pipeline() while len(rows) > 0: for product in rows: print('\n {}'.format(product)) tf = transformer.get_tf(product) print('len tf: {}'.format(len(tf))) self.loader.insert_tf(product['id'], tf, pipeline) pipeline.execute() rows = extractor.get_next_batch(num_of_rows) extractor.close() self.loader.count_df() self.loader.count_tf_idf(num_of_products) except Exception as e: print('ETL.process_all(), error: {}'.format(e))
def __init__(self, localDownloadQueue="PendingDownloadQueue"): Base.__init__(self) self.download_queue = localDownloadQueue self.ftp_sync = FileSyncer() self.move_file_into_processing() Extractor(self.local_directory_to_sync) Cleaner(self.local_directory_to_sync)
def test_attributes2(self): """Test the setting of attributes. """ e = Extractor('testing') self.assertEqual(e.name, 'testing') self.assertTrue(e.JsonRecorder and isinstance(e.JsonRecorder, JsonRecorder)) # The existence and type correctness of the json recorder. self.assertEqual(e.JsonRecorder.name, 'testing') # extractor.JsonRecorder has the same name as the extractor.
async def main(args): extractor = Extractor('patterns.json') if args.file: source = URLGenerator.source(args.file) else: source = ClipboardReader.source visited = set() if args.destination: folder = args.destination else: folder = 'images/' if not os.path.isdir(folder): return count = 1 async for target in source(): if not extractor.input.match(target): continue content, url = await Downloader.downloadContent(target) if url in visited: break visited.add(url) for img in extractor.resource.finditer(content): if not img.group(): continue link = img.group() name = link.split('/')[-1] + '.png' await Downloader.downloadFile(link, os.path.join(folder, name)) print('\033[K\033[A') print('Downloading ' + '.' * count, end='\r') count = (count + 1) % 5 + 1 await waitTilFinish()
def __init__(self, password, image_folder_path): self.image_folder_path = image_folder_path self.mode = None self._gcm = None self._password = password self._data_length = 0 self.extractor = Extractor(image_folder_path)
def __init__(self): # Init basic objects self.cropper = Cropper() self.extractor = Extractor(self.cropper) self.classifier = Classifier(self.extractor.images) self.connections = Connections(self.extractor, self.classifier) self.visualization = Visualization(self.connections)
def __init__(self, histogram): self.distribution = [] self.generalization = [] self.abstraction = [] self.filter = Propagator() self.plotter = Estimator() self.detector = Extractor() self.plotter.setspace(histogram)
def OnExtract(self, events): text = self.sourcePage.GetValue().strip() keyword_result = '' result = '' if text != '': if self.languageType.GetSelection() == 1: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() print similarity_function extractor = Extractor( stop_words_file='./TextRank/trainer/stopword_zh.data') keyword, keyphrase = extractor.keyword_train(text=text) abstract = extractor.sentence_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyword) keyword_result += '\n关键短语:\n' + '/'.join(keyphrase) result += '。'.join(abstract) + r'。' self.abstractPage.SetValue(result) #设置文本样式 #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f)) self.keywordPage.SetValue(keyword_result) else: art_type = self.articleType.GetSelection() extractor = EnExtractor( stop_words_file='./TextRank/trainer/stopword_en.data') if art_type == 1: keyphrase = extractor.keyphrase_train( text, article_type='Abstract') keyword_result = 'Keyphrases:\n' + '/'.join(keyphrase) else: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() keyphrase = extractor.keyphrase_train( text, article_type='Fulltext') summary = extractor.summary_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyphrase) result += ' ' + ' '.join(summary) self.abstractPage.SetValue(result) #设置文本样式 f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 self.abstractPage.SetStyle( 0, len(result), wx.TextAttr('black', wx.NullColor, f)) self.keywordPage.SetValue(keyword_result) else: #test #sentences_percent = self.sentencesPercent.GetValue() #print filter(lambda x:x.isdigit(), sentences_percent) print "No article"
def c_07(): ''' get number of products ''' try: extractor = Extractor(language='es') extractor.connect() n = extractor.get_num_of_products() print(n) except Exception as e: print('c_06(), error: {}'.format(e))
def __init__(self,trainDataPath=None, extractorOptions=[True,False,True],loadPath=None): if not loadPath==None: f=open(loadPath,"rb") self.gpc,self.ext=pickle.load(f) f.close() elif not (trainDataPath==None or extractorOptions==[True,False,True]): self.ext=Extractor(extractorOptions) data=ext.readTsv(trainDataPath) features=self.ext.features(data[0]) labels=data[1] self.gpc=GaussianProcessClassifier().fit(features,labels) else: raise Exception("Either path to saved classifier or (dataset+extractor options) should be given")
def extraer_rep(argumentos, pipe): url = argumentos['url'] url = url.split('/') if url[2] != 'gitlab.com': pipe.send(400) return 1 url = url[3] + '/' + url[4] try: if 'token' in argumentos.keys(): ext = Extractor(link=url, token=argumentos['token']) else: ext = Extractor(url) p = ext.extraer() Almacen.guardar(p) pipe.send(200) except Exception as e: ServidorLogica.log(str(e)) if str(e) == 'Proyecto no encontrado': pipe.send(404) return 1 if str(e) == 'Permisos insuficientes': pipe.send(401) return 1 pipe.send(e)
def run(file_path): dis = Dissector() parser = Parser() extrator = Extractor() if os.path.isdir(file_path): dir_files_list = os.listdir(file_path) for files in dir_files_list: sample = Sample(os.path.join(file_path, files)) dis.extract_file(sample) parser.parse(sample) extrator.extract(sample) sample.print_info() else: sample = Sample(file_path) dis.extract_file(sample) parser.parse(sample) extrator.extract(sample) sample.print_info()
def hello2(): user_input = request.form["user-input"] print("\"{}\"".format(user_input)) context_identifier = ContextIdentifier() context = context_identifier.getContext(user_input) bot_response = "" suggested_word = [] if context == Context.unknown: print("Unknown?") suggested_word = SpellChecker().getWordSuggestion(user_input) elif context == Context.help: bot_response = "Terdapat beberapa hal yang dapat dilakukan:\n" bot_response += "- Menambah tugas (coba \"Tolong ingatkan kalau ada kuis IF3110 Bab 2 pada 22/04/21\")\n" bot_response += "- Melihat semua tugas (coba \"bot tugas apa saja sejauh ini ya?\")\n" bot_response += "-. Melihat tugas pada periode tertentu (coba \"Apa saja deadline antara 03/04/2021 sampai 15/04/2021\")\n" bot_response += "- Melihat tugas beberapa hari/minggu ke depan (coba \"Ada tugas apa saja 2 hari ke depan\")\n" bot_response += "- Melihat tugas yang deadline-nya hari ini (coba \"Deadline tucil hari ini apa saja, ya?\")\n" bot_response += "- Menampilkan deadline dari suatu tugas tertentu (coba \"Deadline tucil IF2230 itu kapan?\")\n" bot_response += "- Memperbarui tugas (coba \"Deadline tucil IF2230 diundur menjadi 02/02/2021\")\n" bot_response += "- Menghapus/menyelesaikan tugas (coba \"bot ujian IF2230 sudah selesai ya jadi gausah disimpan lagi\")\n" bot_response += "Kata kunci:\n" + "\n".join(list(map(lambda x: "- " + x, ["kuis", "tubes", "tucil", "ujian"]))) else: extractor = Extractor() print("\"{}\"".format(user_input)) command = extractor.extract(user_input, context) if command == None: suggested_word = SpellChecker().getWordSuggestion(user_input) else: command.execute() bot_response = command.getResult() if bot_response == "": if len(suggested_word) > 0: bot_response = "Mungkin maksud kata kunci Anda: " + ", ".join(suggested_word) else: bot_response = "Saya tidak paham .-." chat_data.append((user_input, bot_response.split("\n"))) return render_template("index.html", message_data = chat_data[(-5 if len(chat_data) >= 5 else 0):])
def process_sample(file_path): print file_path config = Config() load_sucess = config.load_config() if load_sucess: dis = Dissector() parser = Parser() extrator = Extractor() operator = Operator(config) r_generator = Report_Generator() sample = Sample(file_path) rlt = dis.extract_file(sample, config.get_output_dir()) bin_time_list = list() if rlt: parser.parse(sample) extrator.extract(sample) # config.print_info() operator.operate(sample, config) r_generator.write_report(sample) return sample
def process_all(self): '''This method Extract all the rows SELECTed from the table (mySQL product_translation), Transform, and Load to Redis. This is used for Kill and Fill''' try: extractor = Extractor() #transformer= Transformer( self.params ) extractor.connect() num_of_rows = 20 extractor.execute() rows = extractor.get_next_batch(num_of_rows) transformer = Transformer() courier = CourierClicoh() loader = LoaderCsv() while len(rows) > 0: products = [] for row in rows: print('id : {}'.format(row['id'])) j = courier.add_product(row) product = transformer.get_csv_row(j, row) '''d = { 'id' : row[ 'id' ], 'sku' : row[ 'sku' ], 'clicoh_id' : "row[ 'clicoh_id' ]", 'clicoh_variant_id' : "row[ 'clicoh_variant_id' ]", }''' products.append(product) loader.write_rows(products) rows = extractor.get_next_batch(num_of_rows) extractor.close() print('\n ETL.procell_all() ... end') except Exception as e: print('ETL.process_all(), error: {}'.format(e)) raise
def c_01(): '''get batches of rows from mySQL database. ''' try: e = Extractor() e.connect() e.execute() rows = e.get_next_batch(num_of_rows=2) while len(rows) > 0: print('\n ut_01.c_01(), looping BATCH of rows') for r in rows: print('\n {}'.format(r)) rows = e.get_next_batch(num_of_rows=2) e.close() except Exception as e: print('ut_01.c_01(), error: {}'.format(e)) print('\n end of case 1.')
def test_AddTask(self): extractor = Extractor() # Normal result1 = extractor.extract( "Halo bot, tolong ingetin kalau ada kuis IF3110 Bab 2 sampai 3 pada 22/04/21", Context.addTask) assert result1 != None assert result1.matkul == "IF3110" assert result1.jenis == "kuis" assert result1.deskripsi == "Bab 2 sampai 3" assert result1.tahun == 2021 assert result1.bulan == 4 assert result1.tanggal == 22 # Normal dengan tanggal yang berbeda format, UAS adalah ujian result2 = extractor.extract( "Ingatkan saya ada UAS IF2230 pada 20 Mei 2021. Saya sedang chaos nih. :(", Context.addTask) assert result2 != None assert result2.matkul == "IF2230" assert result2.jenis == "ujian" assert result2.deskripsi == "UAS" assert result2.tahun == 2021 assert result2.bulan == 5 assert result2.tanggal == 20 # Tahun yang sama secara implisit, UTS adalah ujian result3 = extractor.extract( "Beritahukan saya tentang UTS IF2250 pada 1 Januari", Context.addTask) assert result3 != None assert result3.matkul == "IF2250" assert result3.jenis == "ujian" assert result3.deskripsi == "UTS" assert result3.tahun == datetime.now().year assert result3.bulan == 1 assert result3.tanggal == 1 # Tanggal tidak diawali dengan kata pada result4 = extractor.extract( "saya ingin menambahkan tucil IF2220 tentang String Matching yang deadline-nya sudah dekat: 28 April", Context.addTask) assert result4 != None assert result4.matkul == "IF2220" assert result4.jenis == "tucil" assert result4.deskripsi == "String Matching" assert result4.tahun == datetime.now().year assert result4.bulan == 4 assert result4.tanggal == 28 # Tidak ada tanggal (invalid) result7 = extractor.extract( "Ada tubes IF2210 tentang Worms. Ingatkan.", Context.addTask) assert result7 == None # Tidak ada matkul (invalid) result8 = extractor.extract( "Ada tucil tentang objek. Deadline 7 September. Ingatkan.", Context.addTask) assert result8 == None # Tidak ada jenis tugas (invalid) result9 = extractor.extract( "Ingatkan tentang IF2211 tentang BFS dan DFS. Deadline 5 Desember.", Context.addTask) assert result9 == None
def extract(item): date, text = item extractor = Extractor(text=text, max_len=max_len) words = extractor.extract_words(thresh=thresh) words['date'] = date return words, date
print("confusion matrix:\nP\\R\tY\tN\nY\t{0}\t{1}\nN\t{2}\t{3}".format(tp,fp,fn,tn)) def showFeatures(self,dataPath=None,data=None): if not dataPath ==None: data=self.ext.readTsv(dataPath) features=self.ext.features(data[0],True) elif not data==None: features=self.ext.features(data,True) else: raise Exception("no data available") for i in features: print(i) if __name__=="__main__": ext=Extractor() if(input("Are you here to evaluate?\n>>> ").lower()=="y"): itemsList=[f for f in listdir("data/") if f[-4:]==".pkl"] for item in range(len(itemsList)): print("{0}: {1}".format(item,itemsList[item])) clfPath=int(input("Select your classifier PICKLE file\n>>> ")) clf=Classifier(loadPath="data/"+itemsList[clfPath]) dataList=[f for f in listdir("data/") if f[-4:]==".tsv"] for datum in range(len(dataList)): print("{0}: {1}".format(datum,dataList[datum])) dataPath=int(input("Select your evaluation data\n>>> ")) targetLabels=[l for l in input("target labels\n>>> ")] clf.evaluate("data/"+dataList[dataPath],targetLabels) _exit(0) opStr=input("Input your option string\nex)'fttt'\n>>> ") options=[]
options['baseUrl'] = 'http://supervisor/core/api/states/' except: logging.warning("Couldn't get token from Enviroment assuming this is dev") optionsFile = 'local.json' with open(optionsFile) as json_file: options.update(loadJson(json_file)) logging.info("Got {} for database".format(options['db_name'])) client = InfluxDBClient(host=options['db_ip'], port=options['db_port'], username=options['db_user'], password=options['db_pass']) extractor = Extractor(options['baseUrl'], options['sensorPrefix'], options[TOKENKEY], options['Timezone'], options['db_measurement_name']) Einf = Einfluxer(client, options['db_name']) message = "" try: data = extractor.GetMeasurements() except ValueError: message = "Got ValueError when fetching data from Home assistant, The sensor probably haven't fetched data yet." if message == "": if not data[0]['tags']['Metering date'] == Einf.GetLatestMeterDate(): message = "Inserted data for: {}".format( data[0]['tags']['Metering date']) try:
def __init__(self, prop): self.prop = prop self.extractor = Extractor(prop) self.transformer = Transformation(prop) self.loader = Loader(prop)
X_test, y_test, file, generation=20, scale=20, conjunction=False, maxsat_on=True, tailor=False, fitness_func='Pro') param = m.pso() phi = param[0] theta = param[1] psi = param[2] k = param[3] ex = Extractor(clf, phi, theta, psi) ex.extract_forest_paths() ex.rule_filter() print('max_rule', ex.max_rule, 'max_node', ex.max_node) print("original path number: ", ex.n_original_leaves_num) print('original scale: ', ex.scale) print("original path number after rule filter: ", len(ex._forest_values)) sat = Z3Process(ex, k) sat.leaves_partition() sat.maxsat() sat.run_filter() print("original path number after maxsat: ", sat.n_rules_after_max, " after filter: ", sat.n_rules_after_filter, '\n') print('classes:', clf.classes_)
from Extractor import Extractor inputfiles = [ "sample1.rtf", "sample2.rtf", "sample3.rtf", "sample4.rtf", "sample5.rtf", "sample6.rtf" ] result = Extractor(inputfiles) ans = result.getalldiagnosis() new = [] for i, j in ans.items(): new += j for i in new: print(i)
from InfoToTextFile import RecordEntry excelFile = "FinalResult.xlsx" mailingListFile = "Mailing/MailingList.csv" # list of emails, to be written to a text file mail_list = [] # now to action! # create record writer instance recWriter = RecordEntry() # extract mail extractor = Extractor(workBookPath=excelFile) mail_list = extractor.extractAllEmailAddress() # write to file recWriter.writeRecords(text_file=mailingListFile, records=mail_list) # get team records by name and write mail lists or full info # writing mailing lists here teamNames = [ "Corporate", "Operations", "Publications", "Promotions", "Logistics" ] stats = {} for teamName in teamNames: recordsFile = "Mailing/{}.csv".format(teamName) records = extractor.extractRecordByTeam(teamName=teamName)
def main(args: str) -> None: url: str = __check_error_input(args) print("Extracting subjects...") Extractor(WebScraping(url).extract_subjects()).extract_data_frame().to_csv( "./Dati/subjects.csv", index=False) print("DONE!\n")
from Extractor import Extractor from Granulator import Granulator from Agent import Agent from Metric import Metric from Representative import Representative from Clustering_MBSAS import Clustering_MBSAS from Clustering_K_Means import Clustering_K_Means extractor1 = Extractor() obj_clustering_MBSAS = Clustering_MBSAS(3, 0.2, 0.1, 1.1) # Lambda, theta_start ,theta_step, theta_stop agent1 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_MBSAS) agent1.execute(3.1,0.5) # S_T, eta obj_clustering_K_Means = Clustering_K_Means(1,3) #k, k_max agent2 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_K_Means) agent2.execute(3.1,0.5) # S_T, eta
def __init__(self, path, viewer=None, green_screen=False): """ Extract informations of pieces in the img at `path` and start computation of the solution """ green_screen = True self.pieces_ = None factor = 0.40 while self.pieces_ is None: factor += 0.01 self.extract = Extractor(path, viewer, green_screen, factor) self.pieces_ = self.extract.extract() self.viewer = viewer self.green_ = green_screen self.connected_directions = [] self.diff = {} self.edge_to_piece = {} for p in self.pieces_: for e in p.edges_: self.edge_to_piece[e] = p self.extremum = (-1, -1, 1, 1) self.log('>>> START solving puzzle') border_pieces = [] non_border_pieces = [] connected_pieces = [] # Separate border pieces from the other for piece in self.pieces_: if piece.number_of_border(): border_pieces.append(piece) else: non_border_pieces.append(piece) self.possible_dim = self.compute_possible_size(len(self.pieces_), len(border_pieces)) # Start by a corner piece for piece in border_pieces: if piece.number_of_border() > 1: connected_pieces = [piece] border_pieces.remove(piece) break self.log("Number of border pieces: ", len(border_pieces) + 1) self.export_pieces('/tmp/stick{0:03d}'.format(1) + ".png", '/tmp/colored{0:03d}'.format(1) + ".png", 'Border types'.format(1), 'Step {0:03d}'.format(1), display_border=True) self.log('>>> START solve border') start_piece = connected_pieces[0] self.corner_pos = [((0, 0), start_piece)] # we start with a corner for i in range(4): if start_piece.edge_in_direction( Directions.S).connected and start_piece.edge_in_direction( Directions.W).connected: break start_piece.rotate_edges(1) self.extremum = (0, 0, 1, 1) self.strategy = Strategy.BORDER connected_pieces = self.solve(connected_pieces, border_pieces) self.log('>>> START solve middle') self.strategy = Strategy.FILL self.solve(connected_pieces, non_border_pieces) self.log('>>> SAVING result...') self.translate_puzzle() self.export_pieces("/tmp/stick.png", "./images/output/solved.png", display=True)
required=False, default=False, type=bool, dest='preprocess') if __name__ == '__main__': tic = time() args = parser.parse_args() rfpath = join(RFDIR, args.fname) print(args.preprocess, args.count) if not args.preprocess: try: text = open(rfpath, "r").readlines() except: text = open(rfpath, "r", encoding="utf-8").readlines() text = [line.strip() for line in text] extracter = Extractor(text=text, max_len=args.ngram) else: extracter = Extractor(rfpath=rfpath, max_len=args.ngram) words = extracter.extract_words(score_thresh=args.thresh, cnt_thresh=args.count) if args.save: if args.oname: opath = join(WFDIR, args.oname) words.to_csv(opath, encoding="utf_8_sig", index=False, sep='\t') else: opath = join(WFDIR, args.fname) words.to_csv(opath, encoding="utf_8_sig", index=False, sep='\t') print(words) toc = time() print("Total time: %.2fs" % (toc - tic))
from Crawler import Crawler from Extractor import Extractor from Loader import Loader import sys url = sys.argv[1] csv_file = sys.argv[2] data_base = sys.argv[3] cr = Crawler(url) rates = cr.get_response() ex = Extractor(rates, csv_file) ex.extraction() ld = Loader('rates.csv', data_base) ld.save_to_db()