def ngrams(toksentence,n=1,njump=0): n_gram=n # var berisi jumlah n pada N-Gram, default 1(Uni-Gram) n_gram_words=[] kata = toksentence # var dengan type list berisi tokenize sentence try: if type(toksentence) != list: raise NGramErr({"message":"Harap masukkan tokenized sentence!!", "tokenize":type(toksentence)}) else: if len(toksentence) == 0: raise NGramErr({"message":"Panjang tokenized sentence tidak boleh 0!!", "tokenize":len(toksentence)}) if n_gram > len(kata): print "Len N-Gram: %i => Len Sentence: %i"%( n_gram, len(kata)) raise NGramErr({"message":"Total N-Gram tidak boleh melebihi Total Kata", \ "tokenize":len(toksentence)}) except NGramErr as e: print e.args[0]["message"] return except: print "Unexpected other error:", sys.exc_info()[0] return else: try: ################# Mulai proses mengelompokkan kata kedalam N-Gram ################ for z in range(len(kata)): ngram=[] if z+n_gram-1 < len(kata): # Jangan sampai loop melebihi total len(kata) for x in range(n_gram): if x > 0: # Untuk mendapat model yang lebih bervariasi, # kita tambahkan njump parameter if z+n_gram-1+njump< len(kata): if kata[z+x+njump]=='<s>': ngram.append(kata[z+x+njump+1]) else: ngram.append(kata[z+x+njump]) else: """ Dirasa masih dibutuhkan modifikasi lagi untuk memaksimalkan sample ketika penerapan jump parameter digunakan """ if kata[0]=='<s>': ngram.append(kata[z+x]) else: ngram.append(kata[z+x]) else: ngram.append(kata[z+x]) # Ada beberapa kasus ketika kita menerapkan jump parameter (biasanya jika jump param cukup besar) # terdapat sample yang tidak masuk ke karakteristik populasi N-Gram yang kita inginkan # karena itu kita harus menyaring ulang seluruh sample populasi #if len(ngram) == n_gram and ngram not in n_gram_words: n_gram_words.append(ngram) ################ Akhir proses mengelompokkan kata kedalam N-Gram ################# except: print "Unexpected other error:", sys.exc_info()[0] return return n_gram_words
def spcall(qry, param, commit=False): try: dbo = DBconn() cursor = dbo.getcursor() cursor.callproc(qry, param) res = cursor.fetchall() if commit: dbo.dbcommit() return res except: res = [("Error: " + str(sys.exc_info()[0]) + " " + str(sys.exc_info()[1]),)] return res
def spcall(qry, param, commit=False): try: dbo = DBconn() cursor = dbo.getcursor() cursor.callproc(qry, param) res = cursor.fetchall() if commit: dbo.dbcommit() return res except: res = [("Error: " + str(sys.exc_info()[0]) + " " + str(sys.exc_info()[1]), )] return res
def get_otp_for_new_user(self, http_request): """ Generates OTP for new User and triggers a message routine to send SMS to the User's phone number. :param http_request: Incoming Request :return: bottle.HTTPResponse """ try: # generate otp and auth code str_code, auth_code = DbHelper().otpObj.generate_otp( http_request.user.handset_serial_number, http_request.user.phone_number, http_request.timestamp) # print(str_code, auth_code) # send otp message to the client send_otp_message(str_code, http_request.user.phone_number) return new_user_response(http_request) except Exception as e: ret = "ERROR: " + e.__traceback__.__str__() exc_type, exc_value, exc_traceback = sys.exc_info() print( traceback.print_exception(exc_type, exc_value, exc_traceback, limit=5, file=sys.stdout)) print(ret) return failure_response(http_request, Response.OTP_ERROR)
def loc(self, ip, ak=AK): if self.limiter.remainCount() > 0: url = self.url_format % {"ip": ip, "ak": ak} r = requests.get(url, timeout=1) if r.status_code == 200: html = r.text data = json.loads(html) logging.info("Request %s, return code: %d" % (url, r.status_code)) else: logging.warning("Request %s, return code: %d" % (url, r.status_code)) return None self.limiter.inc() time.sleep(0.1) else: logger.info( "self.limiter.remainCount() = %d, self.limiter.speed() = %.8f, Will sleep 10sec." % (self.limiter.remainCount(), self.limiter.speed())) time.sleep(10) logger.info( "self.limiter.remainCount() = %d, self.limiter.speed() = %.8f, getURL Total: %d." % (self.limiter.remainCount(), self.limiter.speed(), self.limiter.getTotalInc())) try: data = self.format(data) except: logger.error(sys.exc_info() + "; " + html) return data
def WordTokenize(self, sentence, stopword=None, removepunct=False): # Split kalimat kedalam kata-kata terpisah berdasar 'spasi' words = re.split(r'\s',sentence) if removepunct: # Buat translation table untuk digunakan di string.translate table = string.maketrans("","") # The method translate() returns a copy of the string in which all characters have been translated # using table (constructed with the maketrans() function in the string module), # optionally deleting all characters found in the string deletechars. words = [z.translate(table,string.punctuation).strip() for z in words] # Hapus seluruh empty char pada list words = filter(lambda x: x!='', words) # Hapus kata yang berada dalam stopwords if stopword!=None: try: if type(stopword)!=list: raise StopWordMustList({"message":"Tipe stopword harus list", "stopword":type(stopword)}) words = filter(lambda x: x not in stopword, words) except StopWordMustList as e: print e.args[0]["message"], " ,Get stopword type:", e.args[0]["stopword"] return except: print "Unexpected other error:", sys.exc_info()[0] return return words
def PrintException(): exc_type, exc_obj, tb = sys.exc_info() f = tb.tb_frame lineno = tb.tb_lineno filename = f.f_code.co_filename linecache.checkcache(filename) line = linecache.getline(filename, lineno, f.f_globals) print '\n' 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format( filename, lineno, line.strip(), exc_obj)
def start(self): for item in self.loader.iter(): ip = item try: loc_dict = self.ip_to_loc.loc(ip) except: logger.error(sys.exc_info()) # import traceback # print traceback.print_exc() continue self.ip_storage.store(ip, **loc_dict)
def binarization_image(filepath, save_backup=False, invert_image=True, threshold=127): """ 锐化图片,二值化图像,之后颜色反转,以增加ocr识别精确度 原因是tesseract在识别黑底白字和白底黑字会有不同表现: 黑底白字有问题,而白底黑字可以识别 Arknights中截取的图片大多为黑底白字,所以转变为白底黑字 :param invert_image: 是否颜色反转图片,绝大部分如理智判断,需要反转图片,但有部分不需要 :param filepath: 进行二值化的图片路径 :param save_backup: 是否启动图片备份。 :param threshold: 临界灰度值,原来是200,现在改为175,有人report问题issue#24; 现在改回PIL默认的127,鬼知道为啥之前的就不行 :return: 返回二值化图片,但暂时没有用,tesseract的调用方式导致必须有个图片路径, 变量的方式不知道怎么弄过去,百度OCR倒是可以,但没弄 """ # 百度api有的时候需要二值化,有时二值化反而会造成负面影响 if save_backup: # 这里给二值化前的图片留个底,确认二值化异常的原因 try: copy(filepath, filepath + ".DebugBackup.png") except IOError as e: print("Unable to copy file. {}".format(e)) except: print("Unexpected error:", sys.exc_info()) picture = Image.open(filepath) # 锐化图片 sharpen = picture.filter(ImageFilter.SHARPEN) # 灰度化 final = sharpen.convert('L') # 颜色反转 if invert_image: final = ImageOps.invert(final) # 二值化,这里其实可以直接使用inverted_image.convert(1)来完成,但为了保障阈值可控,这里“手动”一下 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1) bim = final.point(table, '1') bim.save(filepath) return bim
def collect_tweets(place, query): cnt = 0 try: print( "================================start=========================================" ) for i in range(1): avg = 0 g = geocoder.google(place) g_string = str(g.lat) + "," + str(g.lng) + "," + "100mi" saveFile = open('dat2.csv', 'a') for tweet in tweepy.Cursor(api.search, q=query, lang='en', geocode=g_string).items(50): cnt = cnt + 1 analysis = TextBlob(tweet.text) avg = avg + analysis.sentiment.polarity saveFile.write(str(tweet.created_at) + ",") saveThis = ' '.join( re.sub( "(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|(u2026)|(https)", " ", tweet.text).split()) saveFile.write(saveThis + "," + str(analysis.sentiment.polarity)) saveFile.write('\n') print(analysis.sentiment.polarity) print( "\n===========================end=========================================" ) saveFile1 = open('dat3.csv', 'a') saveFile1.write(query + "," + place + "," + str(g.lat) + "," + str(g.lng) + "," + str(avg / 50)) saveFile1.write('\n') saveFile1.close() print(cnt) except: print("Oops!", sys.exc_info()[0], "occured.") #cnt=0; return cnt
def delete_venue(venue_id): # TODO: Complete this endpoint for taking a venue_id, and using # SQLAlchemy ORM to delete a record. Handle cases where the session commit could fail. response = {} try: response['success'] = True venue = Venue.query.get(venue_id) db.session.delete(venue) db.session.commit() except: db.session.rollback() print(sys.exc_info()) response['success'] = False finally: db.session.close() # BONUS CHALLENGE: Implement a button to delete a Venue on a Venue Page, have it so that # clicking that button delete it from the db then redirect the user to the homepage return jsonify(response)
def collect_tweets(place,query): cnt=0 try: print("================================start=========================================") for i in range(1): avg=0 g = geocoder.google(place) g_string=str(g.lat)+","+str(g.lng)+","+"50mi" #print(g) print("rer") saveFile = open('dat2.csv','a') print("wer") for tweet in tweepy.Cursor(api.search,q=query,lang='en',geocode=g_string).items(50): cnt=cnt+1 analysis = TextBlob(tweet.text) avg=avg + analysis.sentiment.polarity saveFile.write(str(tweet.created_at) + ",") saveThis = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|(u2026)|(https)", " ", tweet.text).split()) saveFile.write(saveThis + "," + str(analysis.sentiment.polarity )) saveFile.write('\n') print(analysis.sentiment.polarity) print("\n===========================end=========================================") saveFile1 = open('dat3.csv','a') saveFile1.write(query + "," + place + "," + str(g.lat) + "," + str(g.lng) + "," + str(avg/50)) saveFile1.write('\n') saveFile1.close() print(cnt) '''gmap = gmplot.GoogleMapPlotter(float(g.lat), float(g.lng), 4) gmap.plot([float(g.lat)], [float(g.lng)], 'cornflowerblue', edge_width=20) gmap.scatter([float(g.lat)], [float(g.lng)], '#3B0B39', size=40, marker=dict(size=8,symbol='square')) gmap.scatter([float(g.lat)],[float(g.lng)], 'k', marker=True) gmap.heatmap([float(g.lat)], [float(g.lng)]) gmap.draw("templates\\mymap.html") #gmap = gmplot.from_geocode("San Francisco")''' except: print("Oops!",sys.exc_info()[0],"occured.") #cnt=0; return cnt
def run(self): process_tracked = self.process_director.start_aspect(HE_ASPECT(self.human_name)) if not process_tracked: log.info("Can not start kappelmeister [%s] because another one is work :(" % self.human_name) return log.info("kapelmiester [%s] starts..." % self.human_name) self.last_token_refresh_time = time_hash(datetime.now()) while 1: log.info("[%s] %s will do next step..." % (self.human_name, self.pid)) try: step = now_hash() if not self.check_state(S_WORK): log.info("state is suspend. I will stop. My pid is: %s" % self.pid) break self.check_token_refresh(step) action, force = self.decide(step) log.info("[%s] decide: %s" % (self.human_name, action)) if action != A_SLEEP: action_result = self.do_action(action, force) else: self.check_state(S_SLEEP) action_result = A_SLEEP time.sleep(MINUTE) log.info("[%s] step is end. Action: [%s] => %s; time spent: %s;" % ( self.human_name, action, action_result, now_hash() - step, )) except Exception as e: log.error("ERROR AT HE! ") _, _, tb = sys.exc_info() log.exception(e) self.db.store_error(self.human_name, e, " ".join(traceback.format_tb(tb))) time.sleep(10)
def create_show_submission(): # called to create new shows in the db, upon submitting new show listing form # TODO: insert form data as a new Show record in the db, instead form = ShowForm(request.form) if form.validate(): error_add_new_show = 'No_error' try: venue_id = request.form['venue_id'] artist_id = request.form['artist_id'] start_time = request.form['start_time'] venue = Venue.query.get_or_404(venue_id) artist = Artist.query.get_or_404(artist_id) start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') # new show new_show = Shows(artist=artist, venue=venue, start_time=start_time) db.session.add(new_show) db.session.commit() except: error_add_new_show = 'Error_db_add' db.session.rollback() print(sys.exc_info()) finally: db.session.close() else: error_add_new_show = 'Error_form' # error flash mensages if error_add_new_show == 'No_error': flash('The Show was successfully added!', 'success') elif error_add_new_show == 'Error_db_add': flash('An error occurred. The Show could not be added.', 'danger') elif error_add_new_show == 'Error_form': flash_errors(form) return redirect(url_for('index'))
def WordTokenize(self, sentence, stopword=None, removepunct=False): # Split kalimat kedalam kata-kata terpisah berdasar 'spasi' words = re.split(r'\s', sentence) if removepunct: # Buat translation table untuk digunakan di string.translate table = string.maketrans("", "") # The method translate() returns a copy of the string in which all characters have been translated # using table (constructed with the maketrans() function in the string module), # optionally deleting all characters found in the string deletechars. words = [ z.translate(table, string.punctuation).strip() for z in words ] # Hapus seluruh empty char pada list words = filter(lambda x: x != '', words) # Hapus kata yang berada dalam stopwords if stopword != None: try: if type(stopword) != list: raise StopWordMustList({ "message": "Tipe stopword harus list", "stopword": type(stopword) }) words = filter(lambda x: x not in stopword, words) except StopWordMustList as e: print e.args[0]["message"], " ,Get stopword type:", e.args[0][ "stopword"] return except: print "Unexpected other error:", sys.exc_info()[0] return return words
def run(self): number_pool = range(1,100) for i in range(0,100): time.sleep(self._delay) count_left = len(number_pool) if (self._claim_status[4] != 0): print "Full House claimed. Game Over." self._game_over_status = True break if (count_left): try: next_num = number_pool.pop(randint(0,count_left-1)) sys.stdout.write((str(next_num) + ' ')) except: print "Popping empty list", sys.exc_info()[0] else: print("\nNumber pool finished.") self._game_over_status = True
def main(): signal.signal(signal.SIGINT, signal_handler) host_base_dir= get_host_base(); try: #companion_setting = data_clct_conf_obj.get_config_data()["companion_setting"] experiment_setting_list = data_clct_conf_obj.get_config_data()["experiment_setting_list"] time.sleep(3) #there needs to be a sleep between restart and change_level start_unreal(host_setting, host_base_dir) time.sleep(7) #there needs to be a sleep between restart and change_level experiment_setting = experiment_setting_list[0] application = experiment_setting["application"] #ros_params = experiment_setting["ros_params"] #stat_file_addr = mavbench_apps_base_dir+"/data/"+application+ "/"+"stats.json" if ("map_name" in experiment_setting.keys()): change_level(experiment_setting["map_name"]) else: restart_unreal() except Exception as e: pass print(traceback.format_exception(*sys.exc_info())) while(1): print("hello") time.sleep(3) #there needs to be a sleep between restart and change_level
def data(self, datatype, params, interval=0): result = [] try: start_day = datetime.datetime.strptime(params["startDay"], "%Y-%m-%d") end_day = datetime.datetime.strptime(params["endDay"], "%Y-%m-%d") remain_num = int(params["remain"]) events = params["events"] num = (end_day - start_day).days attrs = params.get("attrs", {}) if num > 60 or num < 0: return {"errinfo": "日期跨度超出范围!"} if remain_num > 60 or remain_num <= 0: return {"errinfo": "窗口期超出范围!"} except: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() errinfo = traceback.format_exception(exc_type, exc_value, exc_traceback) logging.error(json.dumps(errinfo)) return {"errinfo": "参数错误!"} try: threads = [] while start_day <= end_day: try: query = str( self.create_query_sql( datatype, start_day.strftime("%Y-%m-%d"), (start_day + datetime.timedelta(days=remain_num) ).strftime("%Y-%m-%d"), events, attrs=attrs).decode("utf-8")) except: query = self.create_query_sql( datatype, start_day.strftime("%Y-%m-%d"), (start_day + datetime.timedelta(days=remain_num) ).strftime("%Y-%m-%d"), events, attrs=attrs).decode("utf-8").encode("utf-8") # query = self.create_query_sql(datatype, start_day.strftime("%Y-%m-%d"), (start_day + datetime.timedelta(days=remain_num)).strftime("%Y-%m-%d"), events, attrs=attrs) t = threading.Thread(target=self.submit, args=(datatype, query, result, start_day.strftime("%Y-%m-%d"))) t.start() threads.append(t) time.sleep(interval) start_day += datetime.timedelta(days=1) for _thread in threads: _thread.join() except: import sys import traceback exc_type, exc_value, exc_traceback = sys.exc_info() errinfo = traceback.format_exception(exc_type, exc_value, exc_traceback) logging.error(json.dumps(errinfo)) return {"errinfo": "查询错误!"} # 对“缺失”数据补位 for item in result: item["length"] = remain_num + 1 item["numbers"] = ( item["numbers"] if "numbers" in item else []) + ([""] * (remain_num + 1 - len(item["numbers"] if "numbers" in item else []))) day = datetime.datetime.strptime(item["tm"], "%Y-%m-%d") yesterday = datetime.datetime.today() - datetime.timedelta(days=1) import copy remain_form = [''] * ((yesterday - day).days + 1) remain_result = copy.deepcopy(item["numbers"]) for index, (value, form) in enumerate(zip(remain_result, remain_form)): if value == "": item["numbers"][index] = 0 # 按日期升序排列 result_sorted = sorted(result, key=lambda item: item["tm"]) return result_sorted
def main(): host_base_dir = get_host_base() try: #companion_setting = data_clct_conf_obj.get_config_data()["companion_setting"] experiment_setting_list = data_clct_conf_obj.get_config_data( )["experiment_setting_list"] total_run_ctr = 0 experiment_set_ctr = 0 #write_to_stats_file(stat_file_addr, "{", companion_setting, ssh_client) #--- removing the file that triggers the char (only usefull for follow_the_leader) """ try: os.remove(companion_setting["AirSim_dir"]+ "\\"+ "companion_comp_msgs.txt") except: print "companion_com_msg doesn't exist to remove. This might be ok" """ time.sleep( 3) #there needs to be a sleep between restart and change_level start_unreal(host_setting, host_base_dir) for experiment_setting in experiment_setting_list: num_of_runs = experiment_setting["number_of_runs"] application = experiment_setting["application"] ros_params = experiment_setting["ros_params"] proc_freq = experiment_setting["processor_frequency"] stat_file_addr = mavbench_apps_base_dir + "/data/" + application + "/" + "stats.json" if ("map_name" in experiment_setting.keys()): change_level(experiment_setting["map_name"]) else: restart_unreal() ssh_client = creat_ssh_client(companion_setting, host_base_dir) mk_data_dir(ssh_client) modify_freq(proc_freq, ssh_client) #--- preparting the result file write_to_stats_file( stat_file_addr, '\t\\"experiment_set_' + str(experiment_set_ctr) + '\\":', companion_setting, ssh_client) write_to_stats_file(stat_file_addr, '[', companion_setting, ssh_client) experiment_set_ctr += 1 #minimize_the_window() #--- start collecting data for experiment_run_ctr in range(0, num_of_runs): total_run_ctr += 1 result = schedule_tasks(companion_setting, experiment_setting, ssh_client, host_base_dir) #restart_unreal() time.sleep( 3 ) #there needs to be a sleep between restart and change_level write_to_stats_file( stat_file_addr, '\t' + '\\"app\\":\\"' + str(application) + '\\",', companion_setting, ssh_client) write_to_stats_file( stat_file_addr, '\t' + '\\"processor_freq\\":\\"' + str(proc_freq) + '\\",', companion_setting, ssh_client) for param in ros_params.keys(): write_to_stats_file( stat_file_addr, '\t\\"' + param + '\\":\\"' + str(ros_params[param]) + '\\",', companion_setting, ssh_client) write_to_stats_file( stat_file_addr, '\t\\"experiment_number\\":' + str(total_run_ctr), companion_setting, ssh_client) if (experiment_run_ctr < num_of_runs - 1): write_to_stats_file(stat_file_addr, "},", companion_setting, ssh_client) write_to_stats_file(stat_file_addr, "}],", companion_setting, ssh_client) #write_to_stats_file(stat_file_addr, '\\"experiment_number\\":'+str(experiment_run_ctr)+"}", companion_setting, ssh_client) #write_to_stats_file(stat_file_addr, "]}", companion_setting, ssh_client) stop_unreal() except Exception as e: pass print(traceback.format_exception(*sys.exc_info()))
description=p["content"], categories=('Openparlamento',), mt_keywords=', '.join(tags), dateCreated=p['created_at'], publish=True) # estrazione commenti per il post comments_sql = "select * from sf_blog_comment where sf_blog_post_id=%s and is_moderated=0 order by created_at asc" p_cursor.execute(comments_sql, (p['id'])) c_result_set = p_cursor.fetchall() print " --------" print " Commenti" print " --------" for c in c_result_set: print " %s, %s, %s (%s)" % (c["author_name"], c["author_email"], c["created_at"], c['sf_blog_post_id']) try: comment_id = blog.newComment(post_id, content=c['content'], author=c['author_name'], author_email=c['author_email']) blog.editComment(comment_id, status='approve', date_created_gmt=c['created_at']) except: print "Unexpected error:", sys.exc_info()[0] # to remove throttle filtering (temprarily) # http://wordpress.org/support/topic/slow-down-cowboy-throttling-problems print " " cursor.close()
def sign(json_parsed): """Metodo principale per la gestione del processo di firma dei file passati come parametro :param json_parsed: Il json contenente i parametri e i file da firmare :type json_parsed: str :return: Lista contenente lo status della firma e la lista dei file firmati oppure un messaggio in caso di errore :rtype: List[SignStatus, json] """ ################################### # JSON structure: # { # user_id: codice_fiscale // "X"*15 to skip check, # masterDocumentId: masterDocumentId, # file_list: [ # { # file: file_path, # file_id: id, # file_mime_type: file_mime_type, # file_name: file_name, # file_type: file_type (Es. DocumentoPicoNuovoPU), # file_data: {file additional data (only a bag)}, # signed_file_type: p7m|pdf # "destination": destination_path, # "sig_attributes": { # "visibility": visibility, # "position": { # "page": 'n', # "width": 200.0, # "height": 60.0, # "padding_width": 75.0, # "padding_height": 670.0, # "signature_name": "Signature" # }, # "p7m_sig_type": p7m_sig_type, # "text_template": "", # } # }, # ... # ], # test_mode: true|false, # update_checker_url: [optional], # revocation_checker_url: [optional], # uploader: "http://localhost:8095/", # params: { # azienda: codice_azienda, # serverIdentifier: serverIdentifier, # resultChannel: resultChannel, # endSignManagerUrl: servlet di fine firma # } # } ################################### if "user_id" not in json_parsed: error_message = "missing user_id field" return error_response(SignStatus.ERROR.value, error_message) user_id = json_parsed["user_id"] if "file_list" not in json_parsed: error_message = "missing file_list field" return error_response(SignStatus.ERROR.value, error_message) file_list = json_parsed["file_list"] if not isinstance(file_list, (list, )) or len(file_list) < 1: error_message = "Empty file_list" return error_response(SignStatus.ERROR.value, error_message) for json_file in file_list: if "file" not in json_file: error_message = "missing file field" return error_response(SignStatus.ERROR.value, error_message) if "signed_file_type" not in json_file: error_message = "missing signed_file_type field" return error_response(SignStatus.ERROR.value, error_message) sig_type = json_file["signed_file_type"] if "sig_attributes" not in json_file: error_message = "missing sig_attributes field" return error_response(SignStatus.ERROR.value, error_message) if not allowed_signature(sig_type): error_message = f"{sig_type} not allowed in signed_file_type field" return error_response(SignStatus.ERROR.value, error_message) if "uploader" not in json_parsed: error_message = "missing output_path field" return error_response(SignStatus.ERROR.value, error_message) path_for_signed_files = json_parsed["uploader"] # Check per la destinazione dei file firmati output_to_url = False if validators.url(path_for_signed_files): output_to_url = True else: if not path.exists(path_for_signed_files) or not path.isdir( path_for_signed_files): error_message = f"{path_for_signed_files} field is not a valid directory" return error_response(SignStatus.ERROR.value, error_message) # folder cleanup for _file in listdir(SIGNED_FOLDER): remove(path.join(SIGNED_FOLDER, _file)) # getting params params = {} if "params" in json_parsed: params = json_parsed["params"] # getting revocation server url if "revocation_checker_url" not in json_parsed: error_message = "revocation_checker_url not found. Can't procede to revocation check" log.error(error_message) clear_session(user_id) return error_response(SignStatus.ERROR.value, error_message) revocation_checker_url = json_parsed["revocation_checker_url"] # checking for test mode test_mode = json_parsed["test_mode"] if not test_mode: # getting smart cards connected sessions = None try: sessions = DigiSignLib().get_smart_cards_sessions() except Exception as err: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_session(user_id) return error_response( SignStatus.ERROR.value, "Controllare che la smart card sia inserita correttamente") # attempt to login while True: try: get_pin(user_id) if user_session[user_id]["pin"] == "": raise ValueError("pin not valid") session = DigiSignLib().session_login( sessions, user_session[user_id]["pin"]) break except Exception as err: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_session(user_id) if str(err) == 'aborted': return error_response(SignStatus.ABORTED.value, "Operazione annullata") show_warning_message( "Controllare che il pin sia valido e corretto") # fetching certificate value try: certificate = DigiSignLib().get_certificate(session) certificate_value = DigiSignLib().get_certificate_value( session, certificate) except Exception as err: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_session(user_id) return error_response( SignStatus.ERROR.value, "Impossibile ottenere il certificato di firma") # check for certificate status, time validity and behaviours try: rev_serv_resp = get_certificate_status(revocation_checker_url, user_id, certificate_value, params) except Exception as err: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_session(user_id) return error_response( SignStatus.ERROR.value, "Impossibile verificare il certificato di firma") log.info(f"revocation check results: {rev_serv_resp}") status = rev_serv_resp["status"] timestamp = rev_serv_resp["timestamp"] check_cf = rev_serv_resp["check_cf"] try: behaviours = rev_serv_resp["behaviours"] block_if_expired = behaviours["block_if_expired"] warn_if_expired = behaviours["warn_if_expired"] block_on_untrusted = behaviours["block_on_untrusted"] message_on_untrusted = behaviours["message_on_untrusted"] except: block_if_expired = "N" warn_if_expired = "N" block_on_untrusted = "N" message_on_untrusted = "N" pass try: DigiSignLib.check_certificate_time_validity( status, block_if_expired, warn_if_expired) except Exception as err: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_session(user_id) return error_response( SignStatus.ERROR.value, "Impossibile procedere, certificato scaduto!") # handle certificate status if user_session[user_id]["status"] == REVOKED: return error_response( SignStatus.ERROR.value, "Certificato di firma revocato! Impossibile procedere") if user_session[user_id]["status"] == UNKNOWN: if block_on_untrusted is True: return error_response( SignStatus.ERROR.value, "Il certificato risulta in stato sconosciuto, " "impossibile procedere") if message_on_untrusted is True: if "continue" not in user_session[user_id]: choice = {} _unknown_certificate_choice(choice) if "continue" not in choice: return error_response( SignStatus.ABORTED.value, "La firma è stata interrorra dall'utente") user_session[user_id]["continue"] = choice["continue"] if not user_session[user_id]["continue"]: return error_response( SignStatus.ABORTED.value, "L'utente ha deciso di non procedere alla firma " "a causa dello stato sconosciuto del certificato") else: log.info("test mode enabled, revocation check skipped") # loop on given files signature_status = {} error_count = 0 signed_count = 0 signed_files_list = [] for _index, file_to_sign in enumerate(file_list): # taking parameters file_id = file_to_sign["file_id"] file_name = file_to_sign["file_name"] file_type = file_to_sign["file_type"] file_mime_type = file_to_sign["file_mime_type"] file_data = file_to_sign["file_data"] signature_type = file_to_sign["signed_file_type"] file_path_to_sign = file_to_sign["file"] sig_attributes = file_to_sign["sig_attributes"] destination = file_to_sign["destination"] # initialize response structure output_item = { "file": file_path_to_sign, "file_id": file_id, "file_name": file_name, "file_type": file_type, "file_mime_type": file_mime_type, "file_data": file_data, "signed_file_type": signature_type, "sig_attributes": sig_attributes, "destination": destination, "signed": "", "signed_file": "" } signed_files_list.append(output_item) # handle url file paths if validators.url(file_path_to_sign): try: local_file_path = download_file(file_path_to_sign) except: log.error(f"Impossibile reperire il file: {file_path_to_sign}") _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) signed_files_list[_index]["signed"] = "no" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue else: local_file_path = file_path_to_sign log.info("LOCAL PATH = %s", local_file_path) temp_file_path = "" if not test_mode: try: if signature_type == P7M: # p7m signature temp_file_path = DigiSignLib().sign_p7m( local_file_path, session, user_id, sig_attributes, timestamp, check_cf) elif signature_type == PDF: # pdf signature mime = mimetypes.MimeTypes().guess_type(local_file_path)[0] if mime == 'application/pdf': temp_file_path = DigiSignLib().sign_pdf( local_file_path, session, certificate, certificate_value, user_id, sig_attributes, timestamp, check_cf) else: log.info( f"the file {local_file_path} is not a pdf will be ignored" ) signed_files_list[_index]["signed"] = "no" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue signed_files_list[_index]["signed"] = "yes" signed_count += 1 signature_status[SignStatus.SIGNED] = signed_count except CertificateOwnerException as err: user_tip = "Codice fiscale dell'utente non corrispondente a quello della smart card. " \ "Impossibile procedere." DigiSignLib().session_logout(session) DigiSignLib().session_close(session) return error_response(SignStatus.ERROR.value, user_tip) except CertificateValidityError as err: user_tip = "Certificato non valido temporalmente" DigiSignLib().session_logout(session) DigiSignLib().session_close(session) return error_response(SignStatus.ERROR.value, user_tip) except PdfNotDeserializable as err: log.error(err) user_tip = "Non è possibile firmare il file %s perché ha delle informazioni che lo rendono " \ "incompatibile con FirmaJR. Firmare il file esternamente." % file_name DigiSignLib().session_logout(session) DigiSignLib().session_close(session) return error_response(SignStatus.ERROR.value, user_tip) except: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) signed_files_list[_index]["signed"] = "no" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue else: log.info("test mode enabled, signing skipped") temp_file_path = local_file_path signed_files_list[_index]["signed"] = "yes" signed_count += 1 signature_status[SignStatus.SIGNED] = signed_count # moving signed file to given destination if output_to_url: log.info("moving signed file to given destination: %s", path_for_signed_files) with open(temp_file_path, "rb") as _file: files = {'upload-file': _file} data = { 'destination': destination, 'params': json.dumps(params) } try: log.info(path_for_signed_files) # Url uploader res = post(path_for_signed_files, files=files, data=data) except: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) signed_files_list[_index]["signed_file"] = "EXCEPTION!!" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue if res.status_code != 200: error_message = res.json()["error_message"] log.error(error_message) signed_files_list[_index]["signed_file"] = "ERROR!!" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue else: log.info("file signed and uploaded") signed_files_list[_index]["signed"] = "yes - [remote]" signed_files_list[_index]["signed_file"] = f"{res.text}" signed_count += 1 signature_status[SignStatus.SIGNED] = signed_count continue else: temp_file_name = path.basename(temp_file_path) signed_file_path = path.join(path_for_signed_files, temp_file_name) try: move(temp_file_path, signed_file_path) signed_files_list[_index]["signed_file"] = signed_file_path except: _, value, tb = sys.exc_info() log.error(value) log.error('\n\t'.join(f"{i}" for i in extract_tb(tb))) signed_files_list[_index]["signed_file"] = "LOST" error_count += 1 signature_status[SignStatus.ERROR] = error_count continue # Folder cleanup for _file in listdir(UPLOAD_FOLDER): remove(path.join(UPLOAD_FOLDER, _file)) # logout if not test_mode: try: DigiSignLib().session_logout(session) except: log.error("logout failed") # session close try: DigiSignLib().session_close(session) except: log.error("session close failed") ################################### # response JSON structure: # { signed_file_list: [ # { # file_to_sign: ***, # signed: yes|no, # signed_file: *** # }, # { # file_to_sign: ***, # signed: yes|no, # signed_file: *** # }, # ... # ]} ################################### if SignStatus.SIGNED in signature_status: if SignStatus.ERROR in signature_status: status = SignStatus.PARTIALLY_SIGNED.value else: status = SignStatus.SIGNED.value else: status = SignStatus.ERROR.value res = [status, signed_files_list] return res
def run(self): try: self.result = function(*args, **kw) except: self.error = sys.exc_info()[0]
def sign(): ################################### # request JSON structure: # { # user_id: codice_fiscale // "X"*15 to skip check # file_list: [ # { # file: file_path, # signed_file_type: p7m|pdf # }, # TODO sistema struttura # { # file: file_path, # signed_file_type: p7m|pdf # }, # ... # ], # output_path: output_folder_path # } ################################### MyLogger().my_logger().info("/api/sign request") # check for well formed request JSON invalid_json_request = "Richiesta al server non valida, contatta l'amministratore di sistema" if not request.json: error_message = "Missing json request structure" return error_response_maker(error_message, invalid_json_request, 404) if not "user_id" in request.json: error_message = "missing user_id field" return error_response_maker(error_message, invalid_json_request, 404) user_id = request.json["user_id"] if not "file_list" in request.json: error_message = "missing file_list field" return error_response_maker(error_message, invalid_json_request, 404) file_list = request.json["file_list"] if not isinstance(file_list, (list, )) or len(file_list) < 1: error_message = "Empty file_list" return error_response_maker(error_message, invalid_json_request, 404) for json_file in file_list: if not "file" in json_file: error_message = "missing file field" return error_response_maker(error_message, invalid_json_request, 404) if not "signed_file_type" in json_file: error_message = "missing signed_file_type field" return error_response_maker(error_message, invalid_json_request, 404) sig_type = json_file["signed_file_type"] if not allowed_signature(sig_type): error_message = f"{sig_type} not allowed in signed_file_type field" return error_response_maker(error_message, invalid_json_request, 404) if "sig_attributes" in json_file: sig_attributes = json_file["sig_attributes"] else: MyLogger().my_logger().error( f"missing sig_attributes field for file {json_file['file']}") continue # TODO check altri campi if not "output_path" in request.json: error_message = "missing output_path field" return error_response_maker(error_message, invalid_json_request, 404) path_for_signed_files = request.json["output_path"] output_to_url = False if path_for_signed_files.startswith("http://"): output_to_url = True else: if not path.exists(path_for_signed_files) or not path.isdir( path_for_signed_files): error_message = f"{path_for_signed_files} field is not a valid directory" return error_response_maker(error_message, invalid_json_request, 404) # folder cleanup for _file in listdir(SIGNED_FOLDER): remove(path.join(SIGNED_FOLDER, _file)) # getting smart cards connected try: sessions = DigiSignLib().get_smart_cards_sessions() except Exception as err: _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_pin(user_id) return error_response_maker( str(err), "Controllare che la smart card sia inserita correttamente", 500) # attempt to login try: get_pin(user_id) session = DigiSignLib().session_login(sessions, memorized_pin[user_id]["pin"]) except Exception as err: _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join(f"{i}" for i in extract_tb(tb))) clear_pin(user_id) return error_response_maker( str(err), "Controllare che il pin sia valido e corretto", 500) # loop on given files signed_files_list = [] for index, file_to_sign in enumerate(file_list): # already checked signature_type = file_to_sign["signed_file_type"] file_path_to_sign = file_to_sign["file"] # initialize response structure output_item = { "file_to_sign": file_path_to_sign, "signed": "", "signed_file": "" } signed_files_list.append(output_item) # handle url file paths if file_path_to_sign.startswith("http://"): try: local_file_path = downoad_file(file_path_to_sign) except: MyLogger().my_logger().error( f"Impossibile reperire il file: {file_path_to_sign}") _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join( f"{i}" for i in extract_tb(tb))) signed_files_list[index]["signed"] = "no" continue else: local_file_path = file_path_to_sign try: if signature_type == P7M: # p7m signature temp_file_path = DigiSignLib().sign_p7m( local_file_path, session, user_id, sig_attributes) elif signature_type == PDF: # pdf signature mime = MimeTypes().guess_type(local_file_path)[0] if mime == 'application/pdf': temp_file_path = DigiSignLib().sign_pdf( local_file_path, session, user_id, sig_attributes) else: MyLogger().my_logger().info( f"the file {local_file_path} is not a pdf will be ignored" ) signed_files_list[index]["signed"] = "no" continue signed_files_list[index]["signed"] = "yes" except CertificateOwnerException as err: user_tip = "Codice fiscale dell'utente non corrispondente a quello della smart card. Impossibile procedere." DigiSignLib().session_logout(session) DigiSignLib().session_close(session) return error_response_maker(str(err), user_tip, 500) except: _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join(f"{i}" for i in extract_tb(tb))) signed_files_list[index]["signed"] = "no" continue # moving signed file to given destination if output_to_url: with open(temp_file_path, "rb") as _file: files = {'file': _file} try: MyLogger().my_logger().info(path_for_signed_files) res = post(path_for_signed_files, files=files) except: _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join( f"{i}" for i in extract_tb(tb))) signed_files_list[index]["signed_file"] = "EXCEPTION!!" continue if res.status_code != 200: error_message = res.json()["error_message"] MyLogger().my_logger().error(error_message) signed_files_list[index]["signed_file"] = "ERROR!!" continue else: signed_files_list[index]["signed"] = "yes - [remote]" uploaded_path = res.json()["Ok"] signed_files_list[index][ "signed_file"] = f"{uploaded_path}" continue else: temp_file_name = path.basename(temp_file_path) signed_file_path = path.join(path_for_signed_files, temp_file_name) try: move(temp_file_path, signed_file_path) signed_files_list[index]["signed_file"] = signed_file_path except: _, value, tb = sys.exc_info() MyLogger().my_logger().error(value) MyLogger().my_logger().error('\n\t'.join( f"{i}" for i in extract_tb(tb))) signed_files_list[index]["signed_file"] = "LOST" continue # Folder cleanup for _file in listdir(UPLOAD_FOLDER): remove(path.join(UPLOAD_FOLDER, _file)) # logout try: DigiSignLib().session_logout(session) except: MyLogger().my_logger().error("logout failed") # session close try: DigiSignLib().session_close(session) except: MyLogger().my_logger().error("session close failed") ################################### # response JSON structure: # { signed_file_list: [ # { # file_to_sign: ***, # signed: yes|no, # signed_file: *** # }, # { # file_to_sign: ***, # signed: yes|no, # signed_file: *** # }, # ... # ]} ################################### res = make_response(jsonify({"signed_file_list": signed_files_list})) return res
def main(): try: data_clct_conf_obj = DataClctConf(data_clct_conf_file_addr) #parse config file and instantiate a user_setting = data_clct_conf_obj.get_config_data()["user_setting"] experiment_setting_list = data_clct_conf_obj.get_config_data()["experiment_setting_list"] total_run_ctr = 0 experiment_set_ctr = 0 #write_to_stats_file(stat_file_addr, "{", user_setting, ssh_client) #--- removing the file that triggers the char (only usefull for follow_the_leader) """ try: os.remove(user_setting["AirSim_dir"]+ "\\"+ "companion_comp_msgs.txt") except: print "companion_com_msg doesn't exist to remove. This might be ok" """ time.sleep(3) #there needs to be a sleep between restart and change_level for experiment_setting in experiment_setting_list: for n_core in [4,3,2] : for freq in [2035200, 1574400, 806400]: num_of_runs = experiment_setting["number_of_runs"] application = experiment_setting["application"] ros_params = experiment_setting["ros_params"] if (application == "mapping" or application == "sar"): ros_params["v_max"] = get_v_max(n_core, freq) #proc_freq = experiment_setting["processor_frequency"] proc_freq = freq num_of_core = n_core stat_file_addr = user_setting["mav_bench_dir"]+"data/"+application+ "/"+"stats.json" try: os.remove(user_setting["AirSim_dir"]+ "\\"+ "companion_comp_msgs.txt") except: print "companion_com_msg doesn't exist to remove. This might be ok" if ("map_name" in experiment_setting.keys()): change_level(experiment_setting["map_name"]) else: restart_unreal() #start_unreal(user_setting) ssh_client = creat_ssh_client(user_setting) modify_freq(proc_freq, ssh_client, num_of_core) #--- preparting the result file write_to_stats_file(stat_file_addr, '\t\\"experiment_set_'+ str(experiment_set_ctr) +'\\":', user_setting, ssh_client) write_to_stats_file(stat_file_addr, '[', user_setting, ssh_client) experiment_set_ctr +=1 #minimize_the_window() #--- start collecting data for experiment_run_ctr in range(0, num_of_runs): total_run_ctr += 1 result = schedule_tasks(user_setting, experiment_setting, ssh_client) try: os.remove(user_setting["AirSim_dir"]+ "\\"+ "companion_comp_msgs.txt") except: print "companion_com_msg doesn't exist to remove. This might be ok" restart_unreal() time.sleep(3) #there needs to be a sleep between restart and change_level write_to_stats_file(stat_file_addr, '\t'+'\\"app\\":\\"'+str(application)+'\\",', user_setting, ssh_client) write_to_stats_file(stat_file_addr, '\t'+'\\"processor_freq\\":\\"'+str(proc_freq)+'\\",', user_setting, ssh_client) for param in ros_params.keys(): write_to_stats_file(stat_file_addr, '\t\\"'+param + '\\":\\"'+str(ros_params[param])+'\\",', user_setting, ssh_client) write_to_stats_file(stat_file_addr, '\t\\"experiment_number\\":'+str(total_run_ctr)+',', user_setting, ssh_client) write_to_stats_file(stat_file_addr, '\t\\"num_of_cores\\":'+str(num_of_core), user_setting, ssh_client) if (experiment_run_ctr < num_of_runs - 1): write_to_stats_file(stat_file_addr, "},", user_setting, ssh_client) write_to_stats_file(stat_file_addr, "}],", user_setting, ssh_client) #stop_unreal() #write_to_stats_file(stat_file_addr, '\\"experiment_number\\":'+str(experiment_run_ctr)+"}", user_setting, ssh_client) #write_to_stats_file(stat_file_addr, "]}", user_setting, ssh_client) except Exception as e: pass print(traceback.format_exception(*sys.exc_info()))
async def worker(qu, coro_num, session, engine): loop = asyncio.get_running_loop() while True: if qu.qsize() == 0: break url = await qu.get() try: prox = random.choice(proxies_list) proxies = {'http': prox, 'https': prox} headers = {'User-Agent': random.choice(user_agents)} print(f'[Send request in {coro_num}] [queue_size {qu.qsize()}]', url) response = await session.get(url, headers=headers, timeout=10) if '/category/' in url: post_urls = response.html.xpath('//h3/a/@href') for u in post_urls: if u.endswith('.html'): if u not in articles: await qu.put(u) articles.add(u) continue post = {} name = response.html.xpath('//h1/text()')[0] post['name'] = await loop.run_in_executor(None, translate_text, name, 'ru', 'uk') post['slug'] = slugify(post['name']) post['source'] = url post['category'] = response.html.xpath( '//ul[@class="td-category"]/li/a/text()') post['category'] = ','.join(post['category']) post['image'] = response.html.xpath( '//div[@class="td-post-featured-image"]//img/@src')[0] elements = response.html.xpath('//p') post['content'] = '' post['parsed_time'] = datetime.now().date() for elem in elements: translated = await loop.run_in_executor( None, translate_text, elem.text, 'ru', 'uk') post['content'] += f'<p>{translated}</p>\n' del translated async with engine.acquire() as cursor: sql = Article.insert().values(**post) await cursor.execute(sql) print('[Article saved]', post["name"]) del url, prox, proxies, headers, response, post, sql except (ConnectionError, ReadTimeout): await qu.put(url) except KeyboardInterrupt: quit() except Exception as e: print(e, type(e), sys.exc_info()[2].tb_lineno)
def initial_cleanup(df, replace=False): del_cols = ['success', 'detailed_description', 'about_the_game', 'header_image', 'pc_requirements', 'mac_requirements', 'linux_requirements', 'support_info', 'background', 'legal_notice', 'reviews', 'content_descriptors', 'packages', # is unclear if we want/need this 'package_groups', # is unclear if we want/need this 'ext_user_account_notice' ] num_type_list = ['required_age'] rename_dict = { 'name': 'game_name' } # set steam_appid as index df.set_index('steam_appid') # remove columns we don't care about print('remove columns we dont care about') df_clean = df.drop(columns=del_cols, axis=1, errors='ignore') # rename columns as appropriate df_clean.rename(columns=rename_dict, inplace=True) # update types to numeric print('update types to numeric') for i in num_type_list: df_clean[i] = pd.to_numeric(df_clean[i]) # update types to datetime print('update types to datetime') df_clean = convert_to_datetime(df_clean, 'release_date', {'date': 'release_date'}, ['release_date']) # trim down to just below types print('trim down to just below types') valid_types = ['game', 'dlc', 'demo'] df_clean = remove_unused_data(df_clean, 'type', valid_types) if df_clean.empty: print('no relevant types, stop processing batch') return df_clean print(df_clean.head) print("Pre processed Dataframe size: " + str(df_clean.shape)) # flatten cols as possible print('flatten cols as possible') try: df_clean = flatten_field(df_clean, 'fullgame', {'appid': 'fullgame_appid'}, ['name', 'fullgame']) except: print('Error flattening {} from dict columns: {}'.format('fullgame', sys.exc_info()[0])) try: df_clean = flatten_price(df_clean) except: print('Error flattening {} from dict columns: {}'.format('price', sys.exc_info()[0])) try: df_clean = flatten_platform(df_clean) except: print('Error flattening {} from dict columns: {}'.format('platform', sys.exc_info()[0])) try: df_clean = flatten_field(df_clean, 'recommendations', {'total': 'recommendations'}, ['recommendations']) except: print('Error flattening {} from dict columns: {}'.format('recommendations', sys.exc_info()[0])) try: df_clean = flatten_field(df_clean, 'metacritic', {'score': 'metacritic_score'}, ['metacritic', 'url']) except: print('Error flattening {} from dict columns: {}'.format('metacritic', sys.exc_info()[0])) try: df_clean = flatten_field(df_clean, 'achievements', {"total": "achievement_count"}, ['achievements', 'highlighted']) except: print('Error flattening {} from dict columns: {}'.format('achievements', sys.exc_info()[0])) # convert col of lists to just the string contents # todo: not sure this is really how we want to handle errors!? print('developers to string') try: df_clean = list_to_string(df_clean, 'developers') except: print('failed to convert developer column, blanking out data') df_clean['developers'] = np.nan print('publishers to string') try: df_clean = list_to_string(df_clean, 'publishers') except: print('failed to convert publishers column, blanking out data') df_clean['publishers'] = np.nan print('pull demo id') # there seems to be only 1 demo in the subset i pulled so we'll just show that one demo id instead of the dict try: if df_clean['demos'].dropna().empty: print('demos column was empty') df_clean.rename(columns={'demos': 'demo_appid'}, inplace=True) else: s = df_clean['demos'].apply(pd.Series) s['demo_appid'] = s[0].apply(lambda x: str(x['appid']) if not pd.isnull(x) else np.nan) df_clean = pd.concat([df_clean, s['demo_appid']], axis=1) # drop the original column at the edn of processing df_clean.drop(axis=1, columns='demos', inplace=True) except: print('Error with column {}: {}'.format('demos', sys.exc_info()[0])) # convert cols to bool type try: bool_col = 'controller_support' controller_mapping = {np.nan: False, 'full': True} df_clean[bool_col] = map_to_bool(df_clean, controller_mapping, bool_col) except: print('Error with column {}: {}'.format('controller_support', sys.exc_info()[0])) # convert cols to just counts print('convert columns to just counts') try: df_clean = replace_with_count(df_clean, 'screenshots') df_clean.rename(columns={'screenshots': 'screenshot_count'}, inplace=True) except: print('Error with column {}: {}'.format('screenshots', sys.exc_info()[0])) try: df_clean = replace_with_count(df_clean, 'movies') df_clean.rename(columns={'movies': 'movie_count'}, inplace=True) except: print('Error with column {}: {}'.format('movies', sys.exc_info()[0])) try: df_clean = replace_with_count(df_clean, 'dlc') df_clean.rename(columns={'dlc': 'dlc_count'}, inplace=True) except: print('Error with column {}: {}'.format('dlc', sys.exc_info()[0])) # convert lists to bools for easy categorization print('convert lists to bools for easy categorization - store in separate tables') df_clean = convert_col_to_bool_table(df_clean, 'genres', settings.Database_Tables['GENRES_TABLE']) df_clean = convert_col_to_bool_table(df_clean, 'categories', settings.Database_Tables['CATEGORIES_TABLE']) df_clean = df_clean.drop(columns=[0], axis=1, errors='ignore') print('*** Dataframe columns***') print(df_clean.columns) for k, v in raw_data_dtype.items(): if k not in df_clean.columns: print(k + ' not in dataframe, adding it') df_clean[k] = np.nan print('checking for extra columns and dropping them') extra = [] for col in df_clean.columns: if col not in raw_data_dtype: print(str(col) + ' is extra column that should not exist') extra.append(col) df_clean = df_clean.drop(columns=extra, axis=1) return df_clean
def create_venue_submission(): # TODO: insert form data as a new Venue record in the db, instead # TODO: modify data to be the data object returned from db insertion form = VenueForm(request.form) if form.validate(): error_add_new_venue = 'No_error' try: data_venue = {} name = request.form['name'] city = request.form['city'] phone = request.form.get('phone') or None address = request.form.get('address') facebook_link = request.form.get('facebook_link') or None website_link = request.form.get('website_link') or None seeking_description = request.form.get( 'seeking_description') or None seeking_artist = request.form.get('seeking_artist') or None image_link = request.form.get('image_link') or None # set seek value if seeking_artist is None: seeking_artist = False else: seeking_artist = True # get state object state = request.form.get('state') artist_state = State.query.filter_by( abbreviation=state).first_or_404() # new venue new_venue = Venue(name=name, city=city, address=address, phone=phone, facebook_link=facebook_link, website_link=website_link, seeking_description=seeking_description, seeking_artist=seeking_artist, image_link=image_link, state_fk=artist_state.id) # add genres M2M genres_list = request.form.getlist('genres') genres_add = Genre.query.filter(Genre.name.in_(genres_list)).all() new_venue.genres = genres_add # save for use after commit data_venue['name'] = new_venue.name db.session.add(new_venue) db.session.commit() except: error_add_new_venue = 'Error_db_add' db.session.rollback() print(sys.exc_info()) finally: db.session.close() else: error_add_new_venue = 'Error_form' # error flash mensages if error_add_new_venue == 'No_error': flash('Venue ' + data_venue['name'] + ' was successfully added!', 'success') elif error_add_new_venue == 'Error_db_add': flash( 'An error occurred. Venue ' + request.form['name'] + ' could not be added.', 'danger') elif error_add_new_venue == 'Error_form': flash_errors(form) return redirect(url_for('index'))
def ngrams(toksentence, n=1, njump=0): n_gram = n # var berisi jumlah n pada N-Gram, default 1(Uni-Gram) n_gram_words = [] kata = toksentence # var dengan type list berisi tokenize sentence try: if type(toksentence) != list: raise NGramErr({ "message": "Harap masukkan tokenized sentence!!", "tokenize": type(toksentence) }) else: if len(toksentence) == 0: raise NGramErr({ "message": "Panjang tokenized sentence tidak boleh 0!!", "tokenize": len(toksentence) }) if n_gram > len(kata): print "Len N-Gram: %i => Len Sentence: %i" % (n_gram, len(kata)) raise NGramErr({"message":"Total N-Gram tidak boleh melebihi Total Kata", \ "tokenize":len(toksentence)}) except NGramErr as e: print e.args[0]["message"] return except: print "Unexpected other error:", sys.exc_info()[0] return else: try: ################# Mulai proses mengelompokkan kata kedalam N-Gram ################ for z in range(len(kata)): ngram = [] if z + n_gram - 1 < len( kata): # Jangan sampai loop melebihi total len(kata) for x in range(n_gram): if x > 0: # Untuk mendapat model yang lebih bervariasi, # kita tambahkan njump parameter if z + n_gram - 1 + njump < len(kata): if kata[z + x + njump] == '<s>': ngram.append(kata[z + x + njump + 1]) else: ngram.append(kata[z + x + njump]) else: """ Dirasa masih dibutuhkan modifikasi lagi untuk memaksimalkan sample ketika penerapan jump parameter digunakan """ if kata[0] == '<s>': ngram.append(kata[z + x]) else: ngram.append(kata[z + x]) else: ngram.append(kata[z + x]) # Ada beberapa kasus ketika kita menerapkan jump parameter (biasanya jika jump param cukup besar) # terdapat sample yang tidak masuk ke karakteristik populasi N-Gram yang kita inginkan # karena itu kita harus menyaring ulang seluruh sample populasi #if len(ngram) == n_gram and ngram not in n_gram_words: n_gram_words.append(ngram) ################ Akhir proses mengelompokkan kata kedalam N-Gram ################# except: print "Unexpected other error:", sys.exc_info()[0] return return n_gram_words
def data(self, datatype, params, interval=0): result = OrderedDict([]) try: tm_str_s = params["startDay"] tm_str_e = params["endDay"] events = params["events"] windows = int(params["windows"]) start_day = datetime.datetime.strptime(tm_str_s, "%Y-%m-%d") end_day = datetime.datetime.strptime(tm_str_e, "%Y-%m-%d") attrs = params.get("attrs", {}) num = (end_day - start_day).days if num > 60 or num < 0: return {"errinfo": "日期跨度超出范围!"} if windows > 60 or windows <= 0: return {"errinfo": "窗口期超出范围!"} except: import traceback exc_type, exc_value, exc_traceback = sys.exc_info() errinfo = traceback.format_exception(exc_type, exc_value, exc_traceback) logging.error(json.dumps(errinfo)) return {"errinfo": "参数错误!"} try: threads = [] while start_day <= end_day: try: query = self.create_query_sql( datatype, start_day.strftime("%Y-%m-%d"), (start_day + datetime.timedelta(days=windows) ).strftime("%Y-%m-%d"), events, attrs=attrs) try: query = str(query.decode("utf-8")) except: query = query.decode("utf-8").encode("utf-8") except: import traceback print traceback.print_exc() t = threading.Thread(target=self.submit, args=(datatype, query, result, start_day.strftime("%Y-%m-%d"))) t.start() threads.append(t) time.sleep(interval) start_day += datetime.timedelta(days=1) for _thread in threads: _thread.join() except: import traceback exc_type, exc_value, exc_traceback = sys.exc_info() errinfo = traceback.format_exception(exc_type, exc_value, exc_traceback) logging.error(json.dumps(errinfo)) return {"errinfo": "查询错误!"} result_sort = OrderedDict([]) days = result.keys() days.sort(reverse=True) for day in days: result_sort.setdefault(day, result[day]) return result_sort