def main(): args = parser.parse_args() modify_arguments(args) # setting random seeds torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) with open(args.config_file, 'r') as stream: config = yaml.load(stream) args.config = Munch(modify_config(args, config)) logger.info(args) if args.mode == 'train': train.train(args, device) elif args.mode == 'test': pass elif args.mode == 'analysis': analysis.analyze(args, device) elif args.mode == 'generate': pass elif args.mode == 'classify': analysis.classify(args, device) elif args.mode == 'classify_coqa': analysis.classify_coqa(args, device) elif args.mode == 'classify_final': analysis.classify_final(args, device)
def main(): init() # https://www.scrapehero.com/how-to-prevent-getting-blacklisted-while-scraping/ # https://www.nasdaq.com/symbol/aapl/revenue-eps date_range_pull('2019-Aug-31', '2019-Sep-18') # daily_pull() analysis.analyze(config)
def main(): # Small SETTINGS = { "grid_size": 100, "min_block_size": 1, "dot_skip_rate": 1, "dotsize": 0.1, "fontsize": 10, "figsize": (10, 7), "min_event_size": 3, "lines_join_size": 5, "line_min_size": 10 } # for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis", "small")): # analysis.analyze( # name=foldername, # query_genome_path="samples/small/source.fasta", # ref_genome_path="samples/small/{}.fasta".format(foldername), # segments_file_path="BWA/small/{}/bwa_output.sam".format(foldername), # show_plot=False, # output_folder="output/analysis/small/{}".format(foldername), # settings=SETTINGS.copy() # ) # Large SETTINGS = { "grid_size": int(1e5), "min_block_size": int(1e3), "dot_skip_rate": 10, "dotsize": 0.1, "fontsize": 8, "figsize": (10, 7), "min_event_size": int(1e4), "lines_join_size": "$min_event_size + 3", "line_min_size": "$min_event_size" } for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis")): if not os.path.isdir(mkpath(ROOT_PATH, "output", "analysis", foldername)) or foldername.strip("/").strip("\\") == "small": continue if int(foldername.strip("/").strip("\\").lstrip("large")) > 6: continue analysis.analyze( name=foldername, query_genome_path="samples/{}/large_genome1.fasta".format(foldername), ref_genome_path="samples/{}/large_genome2.fasta".format(foldername), # segments_file_path="BWA/{}/bwa_output.sam".format(foldername), segments_file_path="{}".format(analysis.segment_paths[foldername]), show_plot=False, output_folder="output/analysis/{}".format(foldername), settings=SETTINGS.copy() )
def run(self): global data_analyzed print("Starting playback thread " + str(self.thread_id) + "...") play(self.q, self.thread_id) print("Playback thread " + str(self.thread_id) + " done!") if data_analyzed == 0: analysis.analyze() data_analyzed = 2 print("Timing data analyzed by thread " + str(self.thread_id)) else: data_analyzed = data_analyzed - 1
def main(): SETTINGS = { "grid_size": int(1e5), "min_block_size": int(1e3), "dot_skip_rate": 10, "dotsize": 0.1, "fontsize": 8, "figsize": (10, 7), "min_event_size": int(1e4), "lines_join_size": "$min_event_size + 3", "line_min_size": "$min_event_size" } # for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis")): # if not os.path.isdir(mkpath(ROOT_PATH, "output", "analysis", foldername)) or foldername.strip("/").strip("\\") == "small": # continue # if int(foldername.strip("/").strip("\\").lstrip("large")) > 7: # continue # analysis.analyze( # name=foldername, # query_genome_path="samples/{}/large_genome1.fasta".format(foldername), # ref_genome_path="samples/{}/large_genome2.fasta".format(foldername), # # segments_file_path="BWA/{}/bwa_output.sam".format(foldername), # segments_file_path="{}".format(analysis.segment_paths[foldername]), # show_plot=False, # output_folder="output/analysis/{}".format(foldername), # settings=SETTINGS.copy() # ) for pair_name in os.listdir( mkpath(ROOT_PATH, "output", "alignment_grouptest")): first_genome_name, second_genome_name, _ = pair_name.split(".fasta_") # print(first_genome_name, second_genome_name) first_genome_path = mkpath("samples", "grouptest", first_genome_name + ".fasta") second_genome_path = mkpath("samples", "grouptest", second_genome_name + ".fasta") analysis.analyze(name=pair_name, query_genome_path=first_genome_path, ref_genome_path=second_genome_path, segments_file_path=mkpath("output", "alignment_grouptest", pair_name), show_plot=False, output_folder=mkpath("output", "analysis_grouptest", pair_name), settings=SETTINGS.copy())
def main(): # service = getOrCreateService() if not path.exists(csvFolder): # Query files from drive # https://drive.google.com/drive/u/0/folders/16SQCIy97DRDsRAhpKBilbMh8hlazQAhZ driveFolderId = '16SQCIy97DRDsRAhpKBilbMh8hlazQAhZ' files = getFilesInFolder(driveFolderId, mimeType='application/vnd.google-apps.spreadsheet') # export files exportFiles(files) # do the analysis analyze(csvFolder)
def run(phonemes, words): """Driver to call parsing and comparing functions.""" phoneme1, phoneme2 = parse_phoneme(phonemes) # print(phoneme1) # print(phoneme2) # print(words) # print('Target phonemes: [' + phoneme1 + '], [' + phoneme2 + ']') # print('Data set: ') # print(words) # for each word in words, return word-trans pair # wt_pairs = [Word(w) for w in words] # for w in wt_pairs: print(w) transcribed_words = [Word(w) for w in words] analyze(phoneme1, phoneme2, transcribed_words)
def upload_file(): if request.method == 'POST': saved_files = {} for file_name in ['buildings_file', 'central_file', 'schedule_file']: # check if the post request has the file part if file_name not in request.files: flash('No ' + file_name + 'part') return redirect(request.url) file = request.files[file_name] # if user does not select file, browser also # submit a empty part without filename if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) saved_files[file_name] = file_path file.save(file_path) intervals_dec, buildings, buildings_seat, building_codes = \ analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file']) dict_buildings = {code: building.asdict() for code, building in buildings.items()} return render_template('filter.html', intervals_dec=intervals_dec, buildings=dict_buildings, building_codes=building_codes) return render_template('submit_files.html')
def setup(trackid): global dd_snds, dd_tabs, dd_seq, dd_amp, beat_space, a, rm, patch_rhythm, patch_harmony, d_seq, d_func # play beat click dd_snds = ['../wav/alum3.wav'] dd_tabs = SndTable(dd_snds) dd_seq = Seq(time=1, seq=[0], onlyonce=True) dd_amp = TrigEnv(dd_seq, dd_tabs, dur=.25, mul=.25).out() a = an.analyze(trackid) a = an.pick_events(a, [['action', ['start', 'stop']], ['type', ['beats', 'sections', 'segments']]]) beat_space = list(np.diff(np.array([e['time'] for e in a]))) + [0.0] # Seq seems to ignore the last value in its seq list - needed to add this dummy 0 at end to get all beats # channel 1 is rhythm # channel 2 is harmony s.programout(patch_rhythm, 1) an.show(an.GM_patches[patch_rhythm]) s.programout(patch_harmony, 2) an.show(an.GM_patches[patch_harmony]) an.show(str(len(beat_space)) + ' beats') d_seq = Seq(time=1, seq=beat_space, onlyonce=True) d_func = TrigFunc(d_seq, seq_callback, 'sequence') rm = RawMidi(seq_callback)
def search(self, query, search_type='AND', rank=False): """ Search; this will return documents that contain words from the query, and rank them if requested (sets are fast, but unordered). Parameters: - query: the query string - search_type: ('AND', 'OR') do all query terms have to match, or just one - score: (True, False) if True, rank results based on TF-IDF score """ if search_type not in ('AND', 'OR'): return [] analyzed_query = analyze(query) # print(analyzed_query) results = self._results(analyzed_query) if search_type == 'AND': # all tokens must be in the document documents = [ self.documents[doc_id] for doc_id in set.intersection(*results) ] if search_type == 'OR': # only one token has to be in the document documents = [ self.documents[doc_id] for doc_id in set.union(*results) ] if rank: return self.rank(analyzed_query, documents) ids = [document.ID for document in documents] # return documents return ids
async def handle_echo(reader, writer): data = await reader.read(BUFMAX) message = data.decode() analysis.analyze() # addr = writer.get_extra_info('peername') # print(f"Received {message!r} from {addr!r}") # print(f"Send: {message!r}") # with open('data2.csv', 'wb') as f: # f.write(data) # writer.write(data) # await writer.drain() print("Close the connection") writer.close()
def analyze(bot, update, args): """ Analyzes the sentiment of a single inputed message or the last 10 messages of the user which called this function. By inputing 'me' a user can see what their average sentiment score is over their last 10 messages. """ global client message = ' '.join(args) if (len(message.split()) == 1 and message.lower() == 'me'): user = update.message.from_user user_id = user.id chat_id = update.message.chat_id if chat_id in groups.keys(): cur_group = groups[chat_id] if user_id in cur_group.keys(): messages = cur_group[user_id] sentiment = analysis.get_average_sentiment(messages, client) avg_score = "%.4f" % sentiment.avg_score message = "{} has had an average sentiment score of {} for their last 10 messages"\ .format(user.first_name, avg_score) bot.send_message(chat_id=update.message.chat_id, text=message) else: message = "I don't have any messages stored from {} yet"\ .format(user.first_name) bot.send_message(chat_id=update.message.chat_id, text=message) else: message = "I don't have any messages stored from this chat yet" bot.send_message(chat_id=update.message.chat_id, text=message) else: score = "%.4f" % analysis.analyze(message, client).score message = "Your message of '" + message + "' yielded a score of: " + score bot.send_message(chat_id=update.message.chat_id, text=message)
def readm0(): bar = progressbar.ProgressBar() data = tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/pos/") data.extend(tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/neg/")) regexp = analysis._re_analysis() db = memory.recollect() return [analysis.analyze(open(w[0]).read(), db=db, regexp=regexp) for w in bar(data)]
def deh120(): saved_files = {} for file_name in [('buildings_file','building_abbreviations.csv'), ('central_file','centrally_scheduled_classrooms.csv'), ('schedule_file','ClassSchedule-23_comma.csv')]: file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_name[1]) saved_files[file_name[0]] = file_path intervals_dec, buildings, building_codes = \ analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file']) dict_buildings = {code: building.asdict() for code, building in buildings.items()} deh120 = dict_buildings['DEH']['rooms']['120']['occupancy_matrix'] intervals_dec = get_intervals_dec(800, 1600, 30) days = DAYS open_times = [] for day_i, day in enumerate(deh120): for int_i, occupied in enumerate(day): if occupied == 0: open_event = { 'title': 'DEH - 120 Open', 'start': intervals_dec[int_i], 'end': intervals_dec[int_i+1] if int_i + 1 < len(intervals_dec) else intervals_dec[int_i] + 30, 'dow': days[day_i], 'location': 'DEH - 120', } open_times.append(open_event) return str(open_times)
def process(img): print('<>' * 10, 'Beginning of Process', '<>' * 10) img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) imgs = pupil_finder.find(img) data = [] sum_result = 0 if (not imgs or len(imgs) != 4): print('Pupil not founded') return {'error': 'Pupila não encontrada!'} pieces = normalize.four_pieces(imgs) for i in range(0, len(pieces)): result, piece = analysis.analyze(pieces[i]) retval, buffer = cv2.imencode('.jpg', piece) base64_bytes = base64.b64encode(buffer) base64_string = base64_bytes.decode('utf-8') sum_result += result data.append({'result': round(result, 2), 'img': base64_string}) avarage = sum_result / 4 return {'images': data, 'avarage': round(avarage, 2)} # img = cv2.imread('images/without_diabetes/5d.png') # img = cv2.imread('images/with_diabetes/5esim.png') # img = cv2.imread('images/with_diabetes/4dsim.png') # img = cv2.imread('images/without_diabetes_with_background/3e.png') # cv2.imshow('img', img) # cv2.waitKey(0) # cv2.destroyAllWindows() # result = process(img) # print('result: ', result)
def main(year, inform): df = open_file('../input/suicides.csv') data_clean = cleaning(df) data_imported = impor(data_clean,"https://restcountries.eu/rest/v2/name/") data_filtered= filtering(data_imported, year) path, path2, path3,path4 = analyze(data_filtered,year) file_to_send = pdf(path,path2,path3,path4, 'Helvetica',year,inform) emailing(file_to_send, year)
def meow(days=None, dates=None, limit=None, short=None, fail=True, exclude=None, job_type=None, down_path=config.DOWNLOAD_PATH, periodic=False, ): """ This function actually runs the whole work, you can import it anywhere and run with parameters: :param days: how many days history to take, usually 7 (week) is enough :param dates: specific dates in format ["%m-%d", ..]: ['04-15', '05-02'] :param limit: limit overall amount of jobs to analyze :param short: analyze only this type of jobs, accepts short name: "ha","upgrades","nonha" :param fail: whether analyze and print only failed jobs (true by default) :param exclude: exclude specific job type: "gate-tripleo-ci-f22-containers" :param job_type: include only this job type (like short, but accepts full name): "gate-tripleo-ci-f22-nonha" :param down_path: path on local system to save all jobs files there :param periodic: if take periodic (periodic=True) or patches (False) :return: parsed jobs data, ready for printing to HTML or console """ if not periodic: g = Gerrit(period=days) #gerrit = g.get_project_patches(config.PROJECTS) # Dump gerrit data for investigation #with open("/tmp/gerrit", "w") as f: # f.write(json.dumps(gerrit)) # If debug mode with open("/tmp/gerrit", "r") as f: gerrit = json.loads(f.read()) jobs = (job for patch in gerrit for job in Patch(patch).jobs) else: jobs = (job for url in config.PERIODIC_URLS for job in Periodic( url, down_path=down_path, limit=limit).jobs) f = Filter( jobs, days=days, dates=dates, limit=limit, short=short, fail=fail, exclude=exclude, job_type=job_type, periodic=periodic ) filtered = f.run() ready = [] for job in filtered: ready.append(analyze(job, down_path=down_path)) return ready
def index_document(self, document): if document.ID not in self.documents: self.documents[document.ID] = document document.analyze() for token in analyze(document.fulltext): if token not in self.index: self.index[token] = set() self.index[token].add(document.ID)
def run_streaming_simulation(ge_args, m_args, trace_dir, csv_path): """ Runs simulation of using MiDAS """ print("Running simulation with MiDAS...") recv_ge = network_model.GEModel(ge_args) box = network_model.NetworkBox(recv_ge, m_args.latency, model_constants.RATE_5Mbps) midas = midas_streaming_model.MiDAS(m_args, box) if not midas.valid_rate(): print("not valid set of parameter, skipping") return print("Parameters are N = {}, B = {}, T = {}".format( midas.N, midas.B, midas.T)) # generate stream of frames to use with 1080 60fps for 5 min stream_gen = streams.FixedSizeStream(model_constants.RES_1080P, model_constants.FPS_60) trace_path = "cbr2500.txt" # for trace_path in os.listdir(trace_dir): # if ".swp" in trace_path: # continue frames = stream_gen.from_trace( os.path.abspath(trace_dir + "/" + trace_path)) # initialize GE models and network box midas.box.recv_ge_model = network_model.GEModel(ge_args) # use streaming print("Using {} frames with size {}...".format(len(frames), frames[0].size)) metrics, loss = midas.transmit_source_blocks(frames) # average metrics analysis.analyze(metrics, len(frames) * midas.k, "MiDAS", midas, csv_path, latency=midas.box.latency, total_lost=loss)
def mainline(train=False): datadir = DIR['BASE'] + "data/" if train is True: featurefile = datadir + 'train-features.txt' xmldir = DIR['BASE'] + "demo/train/" else: featurefile = datadir + 'test-features.txt' xmldir = DIR['BASE'] + "demo/test/" deleteFiles([featurefile]) #infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print (str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR['DATA'] + "sec-tfidf-model.txt" if train is False: # Testing outfile = DIR['DATA'] + "sec-tfidf-test-out.txt" for gamma in [1.0]: predictSvm(featurefile, model + str(gamma), outfile) outstring = "Testing. Weight : " + str(gamma) analyze(featurefile, outfile, outstring) #pickleIt() else: # Training outfile = DIR['DATA'] + "sec-tfidf-train-out.txt" deleteFiles([outfile]) for gamma in [1.0]: #trainSvm(featurefile, model + str(gamma), gamma) trainSvm(featurefile, model, gamma) predictSvm(featurefile, model, outfile) outstring = "Training. gamma : " + str(gamma) analyze(featurefile, outfile, outstring=outstring) pickleIt()
def main(): print("archivo?") archivo = input() input_file = open("./inputs/" + archivo) data = input_file.read() input_file.close() name, characters, keywords, tokens = decomp.main(data) final_dfa, dfas = analysis.analyze(name, characters, keywords, tokens) to_file.create(final_dfa, dfas, name) print("Hecho archivo de nombre ", name)
def compile_to_ctree(pysource): sys.setrecursionlimit(100000) t0 = time.time() print 'Generating ctree...' raw_ast = parse(pysource) annotated_ast = analysis.analyze(raw_ast) c_ast = ctree.transform_to_ctree(annotated_ast) print 'Finished generating ctree' print '[elapsed time: %.2f seconds]' % (time.time() - t0) return c_ast
def main(): #print "Pandas Version", pd.__version__ symbol_file = sys.argv[1] startdate = sys.argv[2] enddate = sys.argv[3] starting_equity = sys.argv[4] benchmark = sys.argv[5] ''' benchmark = sys.argv[5] entry_strategy = sys.argv[6] exit_strategy = sys.argv[7] entry_filter = sys.argv[8] exit_filter = sys.argv[9] pos_size_strategy = sys.argv[10] ''' # Get Market data from Yahoo files #d_data, ls_symbols = marketdata.get_data(startdate, enddate,symbol_file,benchmark) #df_prices = d_data['close'] # Get Market data from SQLite database (previously loaded from Yahoo df_prices, ls_symbols = marketdata.get_sqlitedb_data(startdate, enddate, symbol_file, benchmark, 'Close') #df_sma = indicators.sma(df_prices,50) #df_uch, df_lch = indicators.channel(df_prices,50) #analysis.plot(df_uch.index,df_prices['AAPL'],df_uch['AAPL'],df_lch['AAPL'],df_sma['AAPL']) # Find Events and create Event profile df_bb_events = events.find_bb_events(ls_symbols, df_prices, benchmark) #Generate an Event Profile simulator.marketsim(100000, 'mydata.csv', 'portval.csv',df_prices) #Analyze the simulation Results analysis.analyze('portval.csv')
def mainline(train=False): datadir = DIR["BASE"] + "data/" if train is True: featurefile = datadir + "train-features.txt" xmldir = DIR["BASE"] + "demo/train/" else: featurefile = datadir + "test-features.txt" xmldir = DIR["BASE"] + "demo/test/" deleteFiles([featurefile]) # infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print (str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR["DATA"] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR["DATA"] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR["DATA"] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
def mainline(train=False): datadir = DIR['BASE'] + "data/" if train is True: featurefile = datadir + 'train-features.txt' xmldir = DIR['BASE'] + "demo/train/" else: featurefile = datadir + 'test-features.txt' xmldir = DIR['BASE'] + "demo/test/" deleteFiles([featurefile]) #infile = xmldir + 'C08-1122-parscit-section.xml' client_socket = getConnection() for infile in glob(xmldir + "*.xml"): try: print infile + " is being processed." if train is True: generateTrainFeatures(client_socket, infile, featurefile) else: generateTestFeatures(client_socket, infile, featurefile) except Exception as e: print "Some Exception in the main pipeline" print(str(type(e))) print str(e) logging.exception("Something awfull !!") model = DIR['DATA'] + "sec-tfidf-model.txt" if train is False: # TESTING outfile = DIR['DATA'] + "sec-tfidf-test-out.txt" predictSvm(featurefile, model, outfile) extractValues(outfile) outstring = "Default values Test results" analyze(featurefile, outfile, outstring=outstring) pickleIt() else: # TRAINING trainSvm(featurefile, model) outfile = DIR['DATA'] + "sec-tfidf-train-out.txt" predictSvm(featurefile, model, outfile) outstring = "Default values" analyze(featurefile, outfile, outstring=outstring)
def filter(): saved_files = {} for file_name in [('buildings_file','building_abbreviations.csv'), ('central_file','centrally_scheduled_classrooms.csv'), ('schedule_file','ClassSchedule-23_comma.csv')]: file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_name[1]) saved_files[file_name[0]] = file_path intervals_dec, buildings, building_codes = \ analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file']) dict_buildings = {code: building.asdict() for code, building in buildings.items()} return render_template('filter.html', intervals_dec=intervals_dec, buildings=dict_buildings, building_codes=building_codes)
def main(): #print "Pandas Version", pd.__version__ symbol_file = sys.argv[1] startdate = sys.argv[2] enddate = sys.argv[3] starting_equity = sys.argv[4] benchmark = sys.argv[5] ''' benchmark = sys.argv[5] entry_strategy = sys.argv[6] exit_strategy = sys.argv[7] entry_filter = sys.argv[8] exit_filter = sys.argv[9] pos_size_strategy = sys.argv[10] ''' # Get Market data from Yahoo files #d_data, ls_symbols = marketdata.get_data(startdate, enddate,symbol_file,benchmark) #df_prices = d_data['close'] # Get Market data from SQLite database (previously loaded from Yahoo df_prices, ls_symbols = marketdata.get_sqlitedb_data( startdate, enddate, symbol_file, benchmark, 'Close') #df_sma = indicators.sma(df_prices,50) #df_uch, df_lch = indicators.channel(df_prices,50) #analysis.plot(df_uch.index,df_prices['AAPL'],df_uch['AAPL'],df_lch['AAPL'],df_sma['AAPL']) # Find Events and create Event profile df_bb_events = events.find_bb_events(ls_symbols, df_prices, benchmark) #Generate an Event Profile simulator.marketsim(100000, 'mydata.csv', 'portval.csv', df_prices) #Analyze the simulation Results analysis.analyze('portval.csv')
def scrape_job(id, link, json_skills): skills = pd.read_json( json_skills ) # Pandas dataframes can't be passed directly into celery tasks j = get_job(link) j['JobId'] = id j['link'] = link # jobs_table.put_item(Item=j) jobs_table_queue.append(j) # print("Passing job to analyze") d = analyze(j, skills, analysis_table) # if len(d.keys()) == len(d.values()): # analysis_df.loc[id] = d return d
def make_analyze(): try: #Load the data data = request.get_json() except Exception as e: raise e if data == {}: return (bad_request()) else: #Get the text and the language try: lang = data['lang'] except: try: lang = detect_language(data['text']) print(lang) except: responses = jsonify( "Error in vectorize: language field is missing") return responses try: text = data['text'] # we assume text is tokenized except: responses = jsonify("Error in analyze: text is missing") return responses if lang not in ['en', 'es', 'ar', 'ro', 'fr']: responses = jsonify( message= "Language not available. Language must be in ['en','es','ar','ro','fr']" ) return responses filename = os.path.join(os.path.dirname(__file__), 'models-registry.json') registry = load_data(filename) analysis = analyze(text, lang, registry) #print(analysis[0]) #Send the response codes responses = jsonify(concepts=analysis[0], key_ideas=analysis[1], topics=analysis[2]) responses.status_code = 200 return responses
def save_and_segment(doc_id, html, url): import suggestor title, text, words = analyze(html) l = len(words) r.hmset('doc:%s'%doc_id, {'title': title, 'text': text, 'len': l, 'url': url}) r.incrbyfloat('total_len', l) for token in words: suggestor.add_query(token.word, token.weight) r.zadd(u'word:%s'%token.word, token.weight, doc_id) r.hmset(u'dw:%s:%s:%s'%(doc_id, token.word, token.fieldname), { 'pos': token.pos, 'len': token.len, 'weight': token.weight, })
def run_RQ_simulation(ge_args, rq_args, trace_dir, csv_path): """ Runs simulation of using RaptorQ """ # generate stream of frames to use with 1080 60fps for 5 min print("Running simulation with RaptorQ...") recv_ge = network_model.GEModel(ge_args) box = network_model.NetworkBox(recv_ge, rq_args.latency, model_constants.RATE_5Mbps) rq = raptorq_model.RaptorQ(rq_args, box) stream_gen = streams.FixedSizeStream(model_constants.RES_1080P, model_constants.FPS_60) trace_path = "cbr2500.txt" # for trace_path in os.listdir(trace_dir): # if ".swp" in trace_path: # continue frames = stream_gen.from_trace( os.path.abspath(trace_dir + "/" + trace_path)) # initialize new GE models rq.box.recv_ge_model = network_model.GEModel(ge_args) # use RaptorQ print("Using {} frames with size {}...".format(len(frames), frames[0].size)) source_blks, total_symbols = rq.form_source_blocks(frames) print("Using {} source blocks...".format(len(source_blks))) metrics = [] for blk in source_blks: metrics.append(rq.transmit_source_block(blk, model_constants.RQ_RATE)) # average metrics analysis.analyze(metrics, total_symbols, "RQ", rq, csv_path)
def analysis(): if request.method == 'POST': topic = ast.literal_eval(request.form['topic']) sources_ids = ast.literal_eval(request.form['sources']) source_format_string = formatstring(sources_ids) page = ast.literal_eval(request.form['page']) fromDate = format_fromDate(ast.literal_eval(request.form['fromDate'])) toDate = format_toDate(ast.literal_eval(request.form['toDate'])) app.logger.info('POST anaysis: topic(%s), sources_ids(%s)' % ('%s', source_format_string) % tuple([topic] + sources_ids)) analyze(topic, sources_ids) db = g.db cur = db.cursor() rule_count_dic = get_rule_count_dic(cur, topic, sources_ids, fromDate, toDate) from_post_rnum = (page-1)*config['perpage'] post_ruleset_count_dic = get_post_ruleset_count_dic(cur, topic, sources_ids, from_post_rnum, config['perpage'], fromDate, toDate) return jsonify(rule_count_dic = rule_count_dic, post_ruleset_count_dic = post_ruleset_count_dic)
def main(): # string = "((abc)|(dξc))*|ani" # string = trees.pre(string) # tree = trees.evaluate(string) # trees.print2D(tree) # dfa = directo.directo(tree, string) # #graph.graph(dfa, "prueba") # #graph.to_txt(dfa, "prueba") # print(evaluate.is_in_language(dfa, "ab")) # print("Ingrese archivo ") # archivo = input() # archivo = open("./inputs/"+archivo) archivo = open("./inputs/DoubleAritmetica.ATG") data = archivo.read() archivo.close() name, characters, keywords, tokens, productions = decomp.main(data) dfa, dfas, parser = analysis.analyze(name, characters,keywords,tokens,productions) to_file.create(dfa, dfas, parser, name)
def save_and_segment(doc_id, html, url): import suggestor title, text, words = analyze(html) l = len(words) r.hmset('doc:%s' % doc_id, { 'title': title, 'text': text, 'len': l, 'url': url }) r.incrbyfloat('total_len', l) for token in words: suggestor.add_query(token.word, token.weight) r.zadd(u'word:%s' % token.word, token.weight, doc_id) r.hmset(u'dw:%s:%s:%s' % (doc_id, token.word, token.fieldname), { 'pos': token.pos, 'len': token.len, 'weight': token.weight, })
def respond_analysis(bot, update): """ Listens to each message posted in the chat and if it is above or below a given threshold the bot will send a message directed at the user that sent the positive or negative message. Also stores each message it recieves in the groups hashmap. It makes a map for each group chat that contains each user and their 10 most recent messages. """ global client, groups message = update.message text = message.text print(text) score = analysis.analyze(text, client).score if (score >= .5): score = "%.4f" % score bot_msg = "Whoa {} you are looking pretty happy there with a sentiment score of: {}" \ .format(message.from_user.first_name, str(score)) bot.send_message(chat_id=message.chat_id, text=bot_msg) elif (score <= -.5): score = "%.3f" % score bot_msg = "You gotta calm down {}, you're super mad right now with a sentiment score of : {}" \ .format(message.from_user.first_name, str(score)) bot.send_message(chat_id=message.chat_id, text=bot_msg) elif (score == 0.0): bot_msg = "I either can't analyze your message or you are extremely neutral {}" \ .format(message.from_user.first_name) bot.send_message(chat_id=message.chat_id, text=bot_msg) chat_id = message.chat_id user_id = message.from_user.id if chat_id in groups.keys(): cur_group = groups[chat_id] else: groups[chat_id] = {} cur_group = groups[chat_id] if user_id in cur_group.keys(): messages = cur_group[user_id] else: cur_group[user_id] = collections.deque(maxlen=10) messages = cur_group[user_id] messages.append(text)
def run(fitter,fitterParams,fitterCoeff,dataClass,label,valid=0.05, train="train.csv",test="test.csv",profile=False,nTrials=1, force=True,forceFeat=True,plot=False): trainFile = train testFile = test inDir,cacheDir,outDir = getDirsFromCmdLine() # add the label for this run (ie: SVM/Boost/LogisticRegression) outDir = pGenUtil.ensureDirExists(outDir + label +"/") # get the directories we want predictDir = pGenUtil.ensureDirExists(outDir + "predictions") if (profile and plot): profileDir = pGenUtil.ensureDirExists(outDir + "profile") else: profileDir = None # get the data object, by cache or otherwise dataObj = \ pCheckUtil.pipeline([[cacheDir+'data.pkl',getData,dataClass,outDir, inDir+trainFile,valid,False,profileDir,]],forceFeat) return analyze(dataObj,inDir,outDir,testFile,fitter,fitterParams, fitterCoeff,label,dataClass,nTrials,force,plot)
sys.path.insert(0, './bayesian') from bayesian import bayesian sys.path.insert(0, './SVR') from svr import svr start = time.time() ridge(sys.argv[1],sys.argv[2]) ridge_end = time.time() lasso(sys.argv[1],sys.argv[2]) lasso_end = time.time() bayesian(sys.argv[1],sys.argv[2]) bayesian_end = time.time() svr(sys.argv[1],sys.argv[2]) svr_end = time.time() print "Ridge Results" print "Running Time: "+str(ridge_end-start)+" seconds" analyze("ridge_out.csv") print "---" print "Lasso Results" print "Running Time: "+str(lasso_end-ridge_end)+" seconds" analyze("lasso_out.csv") print "---" print "Bayesian Results" print "Running Time: "+str(bayesian_end-lasso_end)+" seconds" analyze("bayesian_out.csv") print "---" print "SVR Results" print "Running Time: "+str(svr_end-bayesian_end)+" seconds" analyze("svr_out.csv")
def test0(number): data = tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/pos/") data.extend(tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/neg/")) print "analyze: "+str(analysis.analyze(open(data[number][0]).read(), debug=True)) print "analyze: "+data[number][1]
# print(tagged_sents) print(len(sents)) parsers = [nltk.RegexpParser(grammar1), nltk.RegexpParser(grammar2), nltk.RegexpParser(grammar3)] # Dict to hold all found facts results = {"FACT1": [], "FACT2": [], "FACT3": [], "FACT4": [], "FACT5": [], "FACT6": []} # For all sentences for sent in sents: # Tag with POS-tags tagged_sent = nltk.pos_tag(sent) for parser in parsers: # Run each parser parsed_sent = parser.parse(tagged_sent) results2 = analyze(parsed_sent) for result in results2["FACT1"]: results["FACT1"].append(result) for result in results2["FACT2"]: results["FACT2"].append(result) for result in results2["FACT3"]: results["FACT3"].append(result) for result in results2["FACT4"]: results["FACT4"].append(result) for result in results2["FACT5"]: results["FACT5"].append(result) for result in results2["FACT6"]: results["FACT6"].append(result) # Find contradictions for FACT1 for result in results["FACT1"]:
set.append(curr) bucket.remove(curr) all_sets.append(set) for i in range(07): test_set = all_sets[i] train_set = [] for set in [all_sets[z] for z in range(07) if z != i]: train_set.extend(set) for key in train_set: writeToFile(featurefile, data[key]['features'] + '\n', 'a') trainSvm(featurefile, model, gamma=1) predictSvm(featurefile, model, outfile) outstring = "Training Fold : " + str(i) print "************* " + outstring + " *************" analyze(featurefile, outfile, resfile, outstring) deleteFiles([featurefile, outfile]) for key in test_set: writeToFile(featurefile, data[key]['features'] + '\n', 'a') predictSvm(featurefile, model, outfile) outstring = "Testing Fold : " + str(i) pre, rec = analyze(featurefile, outfile, resfile, outstring) precision.append(pre) recall.append(rec) print precision print sum(precision) / float(len(precision)) print recall print sum(recall) / float(len(recall))
def tokenize(self, text): return analysis.analyze(text)
def get_results(testcases): for i, testcase in enumerate(testcases): text, expected = testcase result = analysis.analyze(text) yield i + 1, expected, result['classification'], result[ 'polarity'], result['emotions'], text
def analyze(self, args): from pprint import pprint for date in args.date: path = self.get_path(date) pprint(analysis.analyze(path))