def create_app(config_name): app = Flask(__name__, static_url_path='') app.config.from_object(config[config_name]) # Configurations & Extensions config[config_name].init_app(app) login_manager.init_app(app) db.init_app(app) Helper.init_app(app) @app.teardown_request def shutdown_session(exception=None): db.session.remove() # Register Blueprint from .main import main as main_blueprint from .user import user as user_blueprint from .note import note as note_blueprint app.register_blueprint(main_blueprint) app.register_blueprint(user_blueprint) app.register_blueprint(note_blueprint) return app
class Vegenere: def __init__(self): self.helper = Helper() def encrypt(self, plaintext, key): plaintext = self.helper.format(plaintext) key = self.helper.format(key) kryptotext = '' for index, char in enumerate(plaintext): plain_char = ord(char) - 65 key_char = ord(key[index % len(key)]) - 65 krypto_char = ((plain_char + key_char) % 26) + 65 kryptotext += unichr(krypto_char) return kryptotext def decrypt(self, kryptotext, key): kryptotext = self.helper.format(kryptotext) key = self.helper.format(key) plaintext = '' for index, char in enumerate(kryptotext): krypto_char = ord(char) - 65 key_char = ord(key[index % len(key)]) - 65 plain_char = ((krypto_char - key_char) % 26) if plain_char < 0: plain_char += 26 plain_char += 65 plaintext += unichr(plain_char) return plaintext
def snapshot(dataset, name): """ Takes a snapshot """ command = "zfs snapshot {0}@{1}".format(dataset, name) Helper.run_command(command, "/")
def hold(target, snapshot, endpoint=''): if endpoint == '': command = 'zfs hold zsm {0}@{1}'.format(target, snapshot) Helper.run_command(command, '/') else: command = '{0} \'zfs hold zsm {1}@{2}\''.format(endpoint, target, snapshot) Helper.run_command(command, '/')
def __init__(self, host, user, pwd): Helper.__init__(self, host, user, pwd); reload(sys); # set timeout and encoding. sys.setdefaultencoding('utf-8'); socket.setdefaulttimeout(timeout);
def destroy(dataset, snapshot): """ Destroyes a dataset """ command = 'zfs destroy {0}@{1}'.format(dataset, snapshot) Helper.run_command(command, '/')
def release(target, snapshot, endpoint=''): if endpoint == '': command = 'zfs release zsm {0}@{1} || true'.format(target, snapshot) Helper.run_command(command, '/') else: command = '{0} \'zfs release zsm {1}@{2} || true\''.format(endpoint, target, snapshot) Helper.run_command(command, '/')
def snapshot(dataset, name): """ Takes a snapshot """ command = 'zfs snapshot {0}@{1}'.format(dataset, name) Helper.run_command(command, '/')
def given(self, step): '''I have prospects to move''' browser = Helper().login() self.browser = browser print "creating" Helper().add_prospects() browser.execute_script("location.reload()")
def move(self): '''Predator is pretty dumb: if it sees a pray, it follows it to autodestruction. ''' # Can always do indexing on filtered instances, as it should be a pray # always, else the game should have finished (bug?). pray = filter(lambda x: isinstance(x, Pray), self.game.instances)[0] # In case it sees the pray, change direction to follow it. if BaseObject.object_sees_object(self, pray): self.direction = Helper.get_direction_towards(self.coord, pray.coord) # Try and find close predators and sync with them to arrive at the # prey in the same time. # # The sync works the following way: # - each predator finds the max speed and max distance to pray # (from all predators visible from him that they follow the pray) # - they update their speed v' = v_max * d / d_max # where d = own distance to pray max_speed = self.speed own_dist = max_dist = Helper.euclidian_distance(self.coord, pray.coord) for instance in self.game.instances: if instance == self or not isinstance(instance, Predator): continue # Look only for visible predators other than myself. if BaseObject.object_sees_object(self, instance): dist = Helper.euclidian_distance(instance.coord, pray.coord) max_dist = max(max_dist, dist) max_speed = max(max_speed, instance.speed) # Sync speed with other predators. self.speed = max_speed * own_dist / float(max_dist) super(Predator, self).move()
def kmeans_centroids(self, clusters): '''Calculate centroids based on clusters by doing a mean between all states within the same cluster. ''' Helper.verbose_print('Centroids: ' + str(self.get_centroids_count(clusters))) new_centroids = [0 for _ in range(self.k)] for i in range(self.k): state_sum = tuple([(0,0) for _ in range(self.coord_nr)]) nr = 0.0 for state,cluster in clusters.iteritems(): if cluster == i: nr += 1 state_sum = self.add_states(state_sum, state) # At least one representat for a cluster should exist. if nr > 0: # Now divide by the number of members in a cluster every coordinate. new_centroids[i] = map(lambda coord: self.__divide_coord(coord, nr), state_sum) # Treat the case of finding no member in cluster by making it be # infinity everywhere. else: new_centroids[i] = map(lambda coord: tuple([Preprocess.INFINITY for c in coord]), state_sum) return new_centroids
def main(endings, size_limit, search_path): # initiate global function variables movie_list = [] longest_title = 0 # initiate options & arguments from cli movie_endings = tuple(endings.split(", ")) movie_size_limit = int(size_limit) * 1024 * 1024 # MegaBytes # initiate needed objects scanner = Scan(movie_endings, movie_size_limit) helper = Helper() # look for all available files inside directory recursively for root, subs, files in os.walk(search_path): # do available files match a movie-file? for file in files: # is movie file? bool_movie = scanner.is_movie(file) if not bool_movie: continue # is large enough? movie_path = os.path.join(root, file) movie_folder = os.path.basename(root) bool_large = scanner.is_large(movie_path) if not bool_large: continue # is movie file and large enough, try to extract a valid movie name extracted_data = scanner.extract_file_data(file, movie_folder) # if movie has valid data, create a new movie object if -1 in extracted_data: print("Problem with: " + extracted_data[0] + " " + str(extracted_data[1])) else: # data valid, create object and append it movie_object = Movie(extracted_data[0], extracted_data[1], movie_path, root) movie_list.append(movie_object) # does the current movie have the longest title? if longest_title < len(movie_object.title): longest_title = len(movie_object.title) result_str = "Movies counted: {number}".format(number=len(movie_list)) print(result_str) # try to fetch imdb rating for each movie-object for movie in movie_list: movie.fetch_rating() # is current movie in top 250 movie.imdb_top = helper.is_imdb_top(movie) # sort movies by their rating and print them print("") movie_list.sort(key=lambda x: x.rating, reverse=True) for movie in movie_list: movie.print_data(longest_title)
def compute_matrix_block(self, start_row, start_column, num_rows, num_columns): """ Computes a given block of the result matrix. The method invoked by FEP nodes. @param start_row: the index of the first row in the block @param start_column: the index of the first column in the block @param num_rows: number of rows in the block @param num_columns: number of columns in the block @return: the block of the result matrix encoded as a row-order list of lists of integers """ """ This method is searching for the elements that this node needs in order to compute his block. Firstly finds the node from where a element should be taken, starts a thread which will obtain the element, and then puts that element in a matrix. Those are made twice, for each matrix. After calculating the two matrixes, the method 'multiply' gives the result that is returning the result. """ A = [[0 for i in range(self.matrix_size)] for j in range(num_rows)]; B = [[0 for j in range(num_columns)] for j in range(self.matrix_size)]; for i in range(num_rows): for j in range(self.matrix_size): row = start_row + i; id_row = row / self.block_size; id_column = j / self.block_size; node = self.nodes[(self.matrix_size / self.block_size) * id_row + id_column]; i_a = node.node_ID[0]; j_a = node.node_ID[1]; size = node.block_size; helper = Helper(node, row - i_a * size, j - j_a * size, "a"); helper.start(); helper.join(); A[i][j] = helper.element; for i in range(self.matrix_size): for j in range(num_columns): column = start_column + j; id_row = i / self.block_size; id_column = column / self.block_size; node = self.nodes[(self.matrix_size / self.block_size) * id_row + id_column]; i_b = node.node_ID[0]; j_b = node.node_ID[1]; size = node.block_size; helper = Helper(node, i - i_b * size, column - j_b * size, "b"); helper.start(); helper.join(); B[i][j] = helper.element; return self.multiply(A, B, num_rows, num_columns);
def calculate_data(tickets, member, daterange): from helper import Helper backlogs = Helper.calculate_backlog(tickets, member, daterange, "due_assign") burndown = Helper.calculate_burndown_estimated(backlogs) csv = [member, burndown["Start"]] for d in sorted(daterange): csv.append(burndown[d]) return csv
def test_get_direction_towards(self): direction = Helper.get_direction_towards((0,0), (0,10)) self.assertEqual(90, direction) direction = Helper.get_direction_towards((0,0), (1,1)) self.assertEqual(45, direction) direction = Helper.get_direction_towards((0,0), (-1,-1)) eps = math.sin(math.radians(225)) - math.sin(math.radians(direction)) self.assertTrue(eps < 0.001)
def calculate_data_actual(tickets, member, daterange, total_point): from helper import Helper backlogs = Helper.calculate_backlog(tickets, member, daterange, "closed") burndown = Helper.calculate_burndown_actual(backlogs, total_point) csv = [member, burndown["Start"]] for d in sorted(daterange): csv.append(burndown[d]) return csv
def main(miniondata): try: minion_id = str( miniondata.get('id', 'nobody') ) h = Helper() region = h.get_region() mm = MinionManager(region) expected_minion_id_list = mm.get_launched() if minion_id in expected_minion_id_list: accept_minion_key(minion_id) except: raise
def __find_best_state(self, state, states): '''Find the best state out of states array to match the given state. ''' best_d = Helper.euclidian_distance(state, states[0]) best_state = states[0] for state2 in states[1:]: d = Helper.euclidian_distance(state, state2) if best_d > d: best_d = d best_state = state2 return best_d, best_state
class Main: def __init__(self): self.dateOfTrade = None self.mockPredictionObject = None self.mockTradingObject = None self.helperObject = Helper() def getDate(self): dateEntered = raw_input('\nEnter date (dd-mm-yyyy) : ') dateList = dateEntered.split('-') dateList = [int(dateElement) for dateElement in dateList] self.dateOfTrade = datetime.date(dateList[2], dateList[1], dateList[0]) def main(self): print '\n1. Prediction \n2. Trading \n' choice = raw_input('Enter your choice : ') self.getDate() if choice == '1': self.helperObject.cleanBeforeTrade() self.mockPredictionObject = PredictionModel(self.dateOfTrade) self.mockPredictionObject.preConfigureModel() goToTrade = raw_input('\n..Go to trading? (y/n): ') if goToTrade == 'y': runAllAtOnce = raw_input('\n..Run all at once? (y/n): ') self.mockPredictionObject.tradeRun(runAllAtOnce) else: print 'Process aborted by user ..' elif choice == '2': self.mockTradingObject = TradingEngine(self.dateOfTrade, parameters.amountToBeInvested, parameters.keepTime) self.mockTradingObject.getStockValuesForDay() self.mockTradingObject.runTrade('newMethod') print 'Return Value for trade :', self.mockTradingObject.returnValue else: pass
def test_getList__when__an_entry_is_added__should__return_a_list_with(self): # Arrange entry = Helper.getTestEntry() phonebook = Phonebook(Helper.filename) phonebook.records = MagicMock(return_value=[entry]) program = Program(Helper.filename) # Act entries = self.program.getList() # Assert Helper.assertPhonebookContainsEntry(phonebook, entry)
def topics_users(page=0): topic_id = request.query.get('topic_id') if topic_id: topic_id = int(topic_id) rtn = Helper.generate_listpage_resource(TopicUser, page, 'name', '/topics/users', topic_id=topic_id) else: rtn = Helper.generate_listpage_resource(TopicUser, page, 'name', '/topics/users') return dict(users=rtn['resource'], pagination=rtn['pagination'])
def bigvs_followers(page=0): bigv_id = request.query.get('bigv_id') if bigv_id: bigv_id = int(bigv_id) rtn = Helper.generate_listpage_resource(BigVFollower, page, 'name', '/bigvs/followers', bigv_id=bigv_id) else: rtn = Helper.generate_listpage_resource(BigVFollower, page, 'name', '/bigvs/followers') return dict(followers=rtn['resource'], pagination=rtn['pagination'])
def topics_weibos(page=0): user_id = request.query.get('user_id') if user_id: user_id = int(user_id) rtn = Helper.generate_listpage_resource(TopicWeibo, page, 'content', '/topics/weibos', user_id=user_id) else: rtn = Helper.generate_listpage_resource(TopicWeibo, page, 'content', '/topics/weibos') return dict(weibos=rtn['resource'], pagination=rtn['pagination'])
def reposts_users(page=0): repost_id = request.query.get('repost_id') if repost_id: repost_id = int(repost_id) rtn = Helper.generate_listpage_resource(RepostUser, page, 'name', '/reposts/users', repost_id=repost_id) else: rtn = Helper.generate_listpage_resource(RepostUser, page, 'name', '/reposts/users') return dict(users=rtn['resource'], pagination=rtn['pagination'])
def __init__(self, skip_existing_files=True, sleep_timer=10): self.Helper = Helper() self.url = "https://www.gutenberg.org/files/" self.skip_existing_files = skip_existing_files self.sleep_timer = sleep_timer self.ignored_books = [69]
def calculate_user_top_5(user,all_apps,user_download_list): # ways is to calculate all_apps similarity with the user_download_list app_similarity={} for app in all_apps: if app in user_download_list: continue else: #calculate the app similarity with the user download lists similarity = Helper.cosine_similarity([app],user_download_list) if app_similarity.has_key(app): app_similarity[app] = app_similarity[app] + similarity else: app_similarity[app] = similarity #get the top 5 top_five_apps=[] sorted_apps =sorted(app_similarity.items(),key=operator.itemgetter(1),reverse=True) i=0 while i<5: #print("top 10 apps "+sorted_apps[i][0]) ## uncommnet for test one app top 10 top_five_apps.append(sorted_apps[i][0]) i+=1 DataService.update_user_info({'user_id':user},{'$set':{"top_5_app":top_five_apps}})
def setUp(self): self.config = Helper.get_config() self.preprocess = Preprocess(self.config) self.game = Game(self.config, self.preprocess) self.kmeans = KMeans(self.config, self.game) self.pred_nr = self.config.getint('game', 'pred_nr') self.trap_nr = self.config.getint('game', 'trap_nr')
def test_calculate_data_multiticket(self): start = datetime.strptime('2015/10/1', '%Y/%m/%d') end = datetime.strptime('2015/10/3', '%Y/%m/%d') daterange = ['2015/10/01', '2015/10/02'] member = 'admin' tickets = [ { 'reporter': 'admin', 'due_assign': '2015/10/01', 'point': '10' }, { 'reporter': 'admin', 'due_assign': '2015/10/02', 'point': '5' }, { 'reporter': 'admin', 'due_assign': '2015/10/03', # out of date 'point': '5' }, { 'reporter': 'guest', # assign others 'due_assign': '2015/10/01', 'point': '8' }] backlog = Helper.calculate_data(tickets, member, daterange) assert backlog == ['admin', '15', '5', '0']
def calculate_app_top_10(app,user_download_history): #get app with all the lists similarity app_similarity ={} for apps in user_download_history: similarity = Helper.cosine_similarity([app],apps) for listapp in apps: if listapp is app: continue elif app_similarity.has_key(listapp): app_similarity[listapp] = app_similarity[listapp] + similarity else: app_similarity[listapp] = similarity if not app_similarity.has_key(app): return #remove itself #app_similarity.pop(app) #calculate top10 reverse= true means from bigger to smaller #items() and operator.itemgetter(1) means get list of tuples with key-value pair sorted_apps =sorted(app_similarity.items(),key=operator.itemgetter(1),reverse=True) top_ten_apps=[] i=0 while i<10: #print("top 10 apps "+sorted_apps[i][0]) ## uncommnet for test one app top 10 top_ten_apps.append(sorted_apps[i][0]) i+=1
def test_valid_create(self): ws = websocket.create_connection(Helper.really_server) init_request = { "tag": 123, "traceId": "@trace123", "cmd": "initialize", "accessToken": Helper.get_anonymous_token() } ws.send(json.dumps(init_request)) init_data = ws.recv() init_result = json.loads(init_data) self.assertEqual(init_result["evt"], "initialized") ws.send("""{ "tag" : 123, "traceId" : "@trace123", "cmd" : "create", "r" : "/users/", "body" : { "firstName": "Ahmed", "age": 18 } }""") create_data= ws.recv() create_result = json.loads(create_data) self.assertEqual(create_result["body"]["firstName"], "Ahmed")
def crawler_news(self): """ This function will scrap news page wise for given url :return: """ try: loop = True page = 0 while loop: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) news_data = soup.find('tbody') if news_data: for news in news_data.find_all('tr'): try: news_dict = Helper.get_news_dict() title_data = news.find( 'td', { 'class': 'views-field views-field-field-nir-news-title' }).find('a', {'href': True}) title = title_data.text.strip( ) if title_data else "" url_data = news.find( 'td', { 'class': 'views-field views-field-field-nir-news-title' }).find('a', {'href': True}) url = "https://investor.harvardbioscience.com" + str( url_data['href']) if url_data else '' # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue publish_date_data = news.find( 'time', {'class': 'datetime'}) publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data else '' url_response = crawler.MakeRequest( url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find( 'div', {'class': 'node__content'}) description = [] regex = re.compile(r'[\n\xa0]') for desc in description_data.find_all('p'): description.append( regex.sub("", str(desc.text))) description = ''.join(description) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "harvardbioscience", "ticker": "harvardbioscience_scrapped", "industry_name": "harvardbioscience", "news_provider": "harvardbioscience" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk. __dict__['ops']) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() page += 1 else: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)
import os.path, sys sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) import xml.etree.ElementTree as ET from helper import Helper if __name__ == '__main__': module_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) viettreebank_path = module_path + '/data/viettreebank.xml' tree = ET.parse(viettreebank_path) root = tree.getroot() c_l_e_labels = set() labels = ['C', 'L', 'E', 'P', 'Nc', 'R'] for label in labels: for c_label in root.findall('.//' + label): c_text = c_label.text.lower() if c_text not in c_l_e_labels and len(c_text.split()) == 1: c_l_e_labels.add(c_text) print c_text label = [u'còn', u'rất', u'cũng', u'đã'] for p_label in label: if p_label not in c_l_e_labels: c_l_e_labels.add(p_label) result_data_path = module_path + '/data/c_e_l_viettreebank.pkl' Helper.save_obj(c_l_e_labels, result_data_path) print 'Save done!'
def update_hm(xaxis_column_name, yaxis_column_name, zaxis_column_name, cik): df = get_data(cik) # Create new data subset and assign axes subdata = pd.DataFrame({ 'x': df[xaxis_column_name], 'y': df[yaxis_column_name], }) if zaxis_column_name in countable_metrics: subdata['z'] = subdata.index elif zaxis_column_name in delinquency_metrics: subdata['z'] = df[delinquency_metrics[zaxis_column_name]] else: subdata['z'] = df[zaxis_column_name] # Filter out nans subdata = subdata[subdata.x.notnull() & subdata.y.notnull()] # Cut axes into bins if numeric grouped = {'x': False, 'y': False} if subdata.x.dtype in numeric_types: subdata.x = pd.cut(subdata.x, 20, include_lowest=True) grouped['x'] = True if subdata.y.dtype in numeric_types: subdata.y = pd.cut(subdata.y, 20, include_lowest=True) grouped['y'] = True # Modify z-axis type because pivot table does not work with float32 if subdata.z.dtype in ['float16', 'float32']: subdata.z = subdata.z.astype('float') # Build pivot table if zaxis_column_name in countable_metrics: pt = pd.pivot_table(subdata, index='y', columns='x', values='z', aggfunc='count', fill_value=0) elif zaxis_column_name in delinquency_metrics: subdata['ones'] = 1 pt1 = pd.pivot_table(subdata, index='y', columns='x', values='z', aggfunc='count', fill_value=0) pt2 = pd.pivot_table(subdata, index='y', columns='x', values='ones', aggfunc='count', fill_value=0) pt1.sort_index(axis=0, inplace=True) pt1.sort_index(axis=1, inplace=True) pt2.sort_index(axis=0, inplace=True) pt2.sort_index(axis=1, inplace=True) pt = pt1 / pt2 pt.sort_index(axis=0, inplace=True) pt.sort_index(axis=1, inplace=True) pt.fillna(0, inplace=True) else: pt = pd.pivot_table(subdata, index='y', columns='x', values='z', aggfunc='mean', fill_value=0) pt.sort_index(axis=0, inplace=True) pt.sort_index(axis=1, inplace=True) x_labels = pt.columns y_labels = pt.index if grouped['x']: x_labels = np.array([str((i.left + i.right) / 2) for i in pt.columns]) if grouped['y']: y_labels = np.array([str((i.left + i.right) / 2) for i in pt.index]) trace = go.Heatmap( x=x_labels, y=y_labels, z=pt.values.tolist(), colorscale='Greens', reversescale=True, colorbar=dict( tickformat=Helper.get_format(zaxis_column_name, precision=1)), ) layout = go.Layout( font=dict(family=font_family, size=14), autosize=True, height=550, margin={ 't': 50, 'l': 100 }, ) return dict(data=[trace], layout=layout)
#!/usr/bin/env python3 from bottle import Bottle, route, run, template, static_file from helper import Helper import os, sys dirname = os.path.dirname(sys.argv[0]) myHelper = Helper() app = Bottle() @app.route('/') def index(): return template('index', chapters=myHelper.Content) @app.route('/static/css/<filename:re:.*\.css>') def send_css(filename): return static_file(filename, root=dirname + '/static/css') @app.route('/static/js/<filename:re:.*\.js>') def send_js(filename): return static_file(filename, root=dirname + '/static/js') @app.route('/static/img/<chapter:re:[1-9]\d*>/<page:re:[1-9]\d*\.jpg>') def send_img(chapter, page): return static_file(str(page), root=dirname + '/static/img/' + str(chapter))
def crawler_news(self): response = crawler.MakeRequest(self.url, 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop(False, 'mitsui_news') news_data = soup.find_all('dl', {'class': "module_list-update__list-dl"}) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('dd') title = title_data.text if title_data else "" url_data = news.find('a', {'href': True}) url = "https://www.mitsui.com" + str( url_data['href']) if url_data else '' publish_date_data = news.find('dt') if publish_date_data.span != None: publish_date_data.span.decompose() publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data and publish_date_data.text != '' else '' url_response = crawler.MakeRequest(url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find( 'section', {'class': "global_main-contents__section"}) description = [] regex = re.compile(r'[\n\xa0]') for desc in description_data.find_all('p'): description.append(regex.sub("", str(desc.text.strip()))) description = ''.join(description) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "mitsui", "ticker": "mitsui_scrapped", "industry_name": "mitsui", "news_provider": "mitsui" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, 'mitsui_news') if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops']) > 0: bulk_obj.execute() else: print("News Not Found")
def __init__(self, url, body=None, headers=None, logger=None): self.url = url self.body = body self.headers = headers self.logger = logger self.news_collection = Helper.getNewsCollection()
def crawler_news(self): try: loop = True page = 1 while loop: try: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) news_data = soup.find_all('div', {'class': "media"}) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('h4', {'class': 'media-heading'}) title = title_data.text if title_data else "" url_data = news.find('a', {'href': True}) url = "https://www.hbfuller.com" + str( url_data['href']) if url_data else '' # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue publish_date_data = news.find( 'div', {'class': 'listing-date'}) publish_date = Helper.parse_date( str(publish_date_data.text).strip() ) if publish_date_data and publish_date_data.text != '' else '' url_response = crawler.MakeRequest( url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find( 'div', { 'class': 'row ar-body' }).find('div', { 'class': "col-xs-12 col-sm-8 col-md-9" }).find('div', { 'class': 'col-sm-12' }).find('div', {'style': ''}) description = description_data.text.strip().split( '\n') description = ''.join(description[1:]) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "hbfuller", "ticker": "hbfuller_scrapped", "industry_name": "hbfuller", "news_provider": "hbfuller" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk. __dict__['ops']) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) if len(bulk_obj._BulkOperationBuilder__bulk. __dict__['ops']) > 0: bulk_obj.execute() page += 1 else: print("All news has been scrapped !!") loop = False except AttributeError as e: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)
page += 1 else: print("All news has been scrapped !!") loop = False except AttributeError as e: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True) #Create and configure logger logging.basicConfig(filename="news_scraping_logs.log", format='%(asctime)s %(message)s', filemode='a') logger = logging.getLogger() url = "https://www.hbfuller.com/en/north-america/news-and-events/press-releases?page={page}" news_obj = hbfuller(url, logger=logger) news_obj.crawler_news() news_collection = Helper.getNewsCollection() processed_collection = Helper.getProcessNewsCollection() news_log_collection = Helper.getLogCollection() isInserted, rowCount = Helper.processNews(news_collection, processed_collection, 'hbfuller') print('Total rows added Process collection => ' + str(rowCount)) # UPDATING LOG COLLECTION if (isInserted): Helper.makeLog(news_log_collection, processed_collection, 'hbfuller')
parser.add_argument('--params', dest='params', default='utils/params.yaml') parser.add_argument('--name', dest='name', required=True) parser.add_argument('--commit', dest='commit', default=get_current_git_hash()) args = parser.parse_args() with open(args.params) as f: params = yaml.load(f, Loader=yaml.FullLoader) params['current_time'] = datetime.now().strftime('%b.%d_%H.%M.%S') params['commit'] = args.commit params['name'] = args.name helper = Helper(params) logger.warning(create_table(params)) try: if helper.params.fl: fl_run(helper) else: run(helper) except (KeyboardInterrupt, RuntimeError): if helper.params.log: answer = prompt('\nDelete the repo? (y/n): ') if answer in ['Y', 'y', 'yes']: logger.error(f"Fine. Deleted: {helper.params.folder_path}") shutil.rmtree(helper.params.folder_path) if helper.params.tb: shutil.rmtree(f'runs/{args.name}')
def __init__(self, conf): Field.__init__(self, conf) for fieldConf in conf['fields']: field = Helper.createField(fieldConf['type'], fieldConf) self.fields.append(field)
def __init__(self): self.featuresObject = GetFeatures('corpora') self.helperObject = Helper() self.svmObject = None
print ",--. ,--. ,---. ,------.,------. ,------. ,------. ,-----. ,------." print "| | | |' .-' | .---'| .--. ' | .--. '| .--. '' .-. '| .---'" print "| | | |`. `-. | `--, | '--'.' | '--' || '--'.'| | | || `--," print "' '-' '.-' || `---.| |\ \ | | --' | |\ \ ' '-' '| |`" print " `-----' `-----' `------'`--' '--' `--' `--' '--' `-----' `--'" #### Init global vars #### initTime = str('%02d' % datetime.datetime.now().hour) + ':' + str( '%02d' % datetime.datetime.now().minute) + ':' + str( '%02d' % datetime.datetime.now().second) startTime = datetime.datetime.now() converted, activeWorkers = 0, 0 #### Init helper object #### helperObj = Helper() #### Init options #### options, args = helperObj.setupParser() #### Init DB #### InputMongoDB = MongoClient().FormattedLogs[options.inputMongo] helperObj.OutputMongoDB = MongoClient().profile_user['profile_user_' + initTime] helperObj.BotMongoDB = MongoClient().config_static.profile_bots #### Get list of admin strings #### AdminMongoList = [] for admin in MongoClient().config_static.profile_admin.find(): AdminMongoList.append(admin['name']) helperObj.AdminMongoList = AdminMongoList
class HagneauImplementation: def __init__(self): self.featuresObject = GetFeatures('corpora') self.helperObject = Helper() self.svmObject = None def extractFeatures(self): if isfile('data/all_features.pkl'): remove('data/all_features.pkl') else: pass self.helperObject.corpusFilesLoader(True) extractionTimeStart = time.time() self.featuresObject.extractFeatures(parameters.baseFeatureType) extractionTimeEnd = time.time() print 'Features Extraction Time : %.2f minutes' % ( (extractionTimeEnd - extractionTimeStart) / 60) def selectFeatures(self): selectionTimeStart = time.time() self.featuresObject.selectFeatures(parameters.baseSelectionType, parameters.numberOfFeatures) selectionTimeEnd = time.time() print 'Features Selection Time : %.2f minutes' % ( (selectionTimeEnd - selectionTimeStart) / 60) def getVectors(self): representationTimeStart = time.time() numberOfVectorsFormed = self.featuresObject.representFeatures() representationTimeEnd = time.time() numberForTraining = int(numberOfVectorsFormed * parameters.percentageTrainVectors) print 'Document Vectors representation time : %.2f minutes' % ( (representationTimeEnd - representationTimeStart) / 60) def trainSVM(self): self.svmObject = SVMTrain(True) self.svmObject.prepareSVM() self.svmObject.trainingPhase() def classifyResults(self): self.svmObject = SVMResult(True) self.svmObject.prepareSVM() result = self.svmObject.svmObject.cv(self.svmObject.trainData, numFolds=5) print 'Classification accuracy : ', result.getBalancedSuccessRate() def doTrading(self): self.svmObject = SVMResult(True) self.svmObject.prepareSVM() choice = 'y' while choice != 'n': enteredDate = raw_input('Enter date (dd-mm-yyyy): ') dayList = enteredDate.split('-') dayList = [int(i) for i in dayList] tradingDay = datetime.date(dayList[2], dayList[1], dayList[0]) sentimentList = [] dataSetPath = 'dataset/' + tradingDay.strftime('%d-%h-%G') timeNow = datetime.datetime.combine(tradingDay, datetime.time(9, 15)) endingTime = datetime.datetime.combine(tradingDay, datetime.time(15, 15)) streamTime = 0 currentSentiment = 0.0 totalSuccessRate = 0.0 startTime = time.time() while True: fileNames = listdir('new_news') for fileName in fileNames: remove('new_news/' + fileName) print '\nTime :', timeNow.strftime('%H:%M'), '\n' noNewsFile = False # get the file name which is the current news fileReader = open(dataSetPath + '/log_file.txt', 'r') for lineNumber, line in enumerate(fileReader): if lineNumber == streamTime: newsFileName = line else: pass fileReader.close() # check whether news file is present or not tempValue = newsFileName.split(' ')[:-1] if tempValue[1] != '0': newsFileName = tempValue[0] tree = ET.parse(dataSetPath + '/corpora/' + newsFileName) root = tree.getroot() for sentimentNode in root.iter('sentiment'): if sentimentNode.text == 'neutral': sentimentNode.text = 'positive' else: pass tree.write('new_news/' + newsFileName) #copy(dataSetPath + '/corpora/' + newsFileName, 'new_news/' + newsFileName) else: noNewsFile = True sentiments = [] if noNewsFile == False: matrix, successRate = self.svmObject.getResult( parameters.baseFeatureType) totalSuccessRate += successRate for i in range(2): for j in range(2): if matrix[i][j] > 0: if j == 0: s = -1.0 else: s = 1.0 sentiments.append(s * matrix[i][j]) else: pass currentSentiment = sum(sentiments) / (len(sentiments) * 1.0) else: pass sentimentList.append(currentSentiment) if noNewsFile == False: remove('new_news/' + newsFileName) else: pass streamTime += 1 timeNow += datetime.timedelta(minutes=15) if timeNow >= endingTime: break endTime = time.time() totalSuccessRate /= streamTime print 'Classification Success Rate : ', (totalSuccessRate * 100) print 'Total time taken for all classfication : %.2f minutes' % ( (endTime - startTime) / 60) # trading engine tradingObject = TradingEngine(tradingDay, parameters.baseInvestAmount, parameters.baseKeepTime) tradingObject.getStockValuesForDay() tradingObject.runTrade('base', sentimentList) print 'Return Value for trade : ', tradingObject.returnValue choice = raw_input('Enter another date? (y/n) : ')
import json import collections import helper_test from configuration import Configuration from helper import Helper debug = True configuration = Configuration('actioncam', path=helper_test.config_path(), debug=debug) config_actioncam = configuration.config["actioncam"] helper = Helper(configuration.config) helper.state_set_start() def test_config_default(): print("test_config_default") assert configuration.config['DEFAULT'] != "", "Failed checkingd Default" j = configuration.config['DEFAULT'] j = collections.OrderedDict(sorted(j.items())) print(json.dumps(j, indent=4, sort_keys=True)) def test_config_output_folder(): print("test_config_output_folder") c_camera = configuration.config["camera"] type = "motion.avi" out = c_camera["recording_location"] + "/" + c_camera[ 'identify'] + "_" + helper.now_str() + "_" + type
def crawler_news(self): """ This function will scrap news page wise for given url :return: """ try: loop = True page = 0 while loop: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) news_data = soup.find_all( 'div', { 'class': "coh-column zmb-44 coh-visible-xs coh-col-xs-12 coh-visible-md coh-col-md-6 coh-col-md-push-0 coh-col-md-pull-0 coh-col-md-offset-0" }) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('a') title = title_data.text if title_data else "" url_data = news.find('a', {'href': True}) url = "https://www.zscaler.com/" + str( url_data['href']) if url_data else '' # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue publish_date_data = news.find( 'time', {'class': 'text-center bg-sea-green'}) publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data and publish_date_data.text != '' else '' url_response = crawler.MakeRequest( url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find_all( 'div', {'class': "col-sm-12"}) description = [] regex = re.compile(r'[\n\xa0]') for desc in description_data: if desc.find( 'a', {'href': 'https://www.zscaler.com/'}) != None: description.append( regex.sub("", str(desc.text))) description = ''.join(description) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "zscaler", "ticker": "zscaler_scrapped", "industry_name": "zscaler", "news_provider": "zscaler" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk. __dict__['ops']) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() page += 1 else: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)
def monitor_fab(self, screener, df_metrics, tfs, number_of_trades=3, leverage=1, recency=-1, additional_balance=0, max_trade_size=None): # self.order_history = pd.DataFrame() self.load_account() executor = self.executor self.leverage = leverage essential_metrics = Helper.drop_extra_delays(df_metrics, screener.tf_delay_match) self.starting_capital = self.get_capital( additional_balance=additional_balance) self.max_trade_size = max_trade_size self.key = self.get_key(essential_metrics) # self.key.to_csv ('Optimization Function Key.csv') now = Helper.current_minute_datetime() while True: if datetime.now() >= now + pd.Timedelta(1, 'minute'): clear_output(wait=True) print("Current time: \t", datetime.now(), end='\n\n') current_positions = self.get_current_trade_progress( key=self.key) if len(current_positions) > number_of_trades: raise Exception( f"You have more than {number_of_trades} active trades! Close first" ) if current_positions['usd size'].sum( ) > self.starting_capital * 1.5: raise Exception('We are too overexposed!', current_positions['usd size'].sum()) self.close_any_old_trades(screener, executor, current_positions) remaining_capital = self.calculate_remaining_capital( current_positions, self.capital) ## trades_to_enter is a list of lists (Ex. [('BANDUSDT', 'Short', 'Rule2'), ('BCHUSDT', 'Short', 'Rule 1')] ## df_recent_signals is a regular screener dataframe trades_to_enter, recent_signals = screener.top_trades( trader=self, df_metrics=df_metrics, tfs=tfs, n=number_of_trades, recency=recency) self.trades_to_enter, self.recent_signals = trades_to_enter, recent_signals print("Signals:") display(recent_signals) trades_left = number_of_trades - len(current_positions) if remaining_capital > 100 and trades_left > 0 and trades_to_enter: self.trade_free_capital(executor, current_positions, remaining_capital, trades_to_enter) elif remaining_capital > 100 and len(current_positions) != 0: trades_to_enter = self.update_trades_to_enter( current_positions=current_positions, trades_to_enter=trades_to_enter) self.trade_free_capital(executor, current_positions, remaining_capital, trades_to_enter) elif not trades_to_enter: Helper.sleep(60) now = Helper.current_minute_datetime() continue else: print("Leverage: ", self.leverage) print("Trades to Enter: ", trades_to_enter) print("Max Trades: ", number_of_trades) self.optimize_trades(executor, current_positions, trades_to_enter, number_of_trades) Helper.sleep(60) now = Helper.current_minute_datetime() print() Helper.output_loading()
def crawler_news(self): try: response = crawler.MakeRequest(self.url, 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) news_data = soup.find('div', {'id': "contents"}) if news_data: for news in news_data.find_all('dt', {'class': 'mgnT15'}): news_dict = Helper.get_news_dict() title_data = news.find_next_sibling().a title = title_data.text if title_data else "" url_data = news.find_next_sibling().a url = "https://www.toray.in/india/news/" + str( url_data['href']) if url_data else '' # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue publish_date = Helper.parse_date( str(news.text).split('\n') [0]) if news and news.text != '' else '' url_response = crawler.MakeRequest(url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find_all( 'p', {'class': "mgnB20"}) description = [] regex = re.compile(r'[\n\xa0]') for desc in description_data: description.append(regex.sub("", str(desc.text))) description = ''.join(description) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "toray", "ticker": "toray_scrapped", "industry_name": "toray", "news_provider": "toray" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)
from helper import Helper s_id = input("Please input your student id: ") print("Today ==================== 0") print("Tomorrow ================= 1") print("The day after tomorrow === 2") date_offset = input("Please choose the date: ") s_time = input("Please input start time: ") e_time = input("Please input end time: ") print("花津2楼南: 2s") print("花津3楼南: 3s") print("花津3楼北: 3n") print("花津4楼南: 4s") print("花津4楼北: 4n") print("3楼公共E: 3g1") print("3楼公共W: 3g2") print("4楼公共E: 4g1") print("4楼公共W:4g2") room_code = input("Please choose room: ") helper = Helper(s_id) res = helper.run(room_code, date_offset, s_time, e_time) if res: print("Congradulations! :)") else: print("Shit! I failed :(")
def test_basic(self): success = 0 for i in range(100): # total number of clients N = 9 # number of preferences allowed k = 2 # initialize server and clients server = Server(N, k, self.g, self.p) clients = [] for i in range(N): clients.append(Client(i, N, k, self.g, self.p)) # entries are generated and sent to server, who encrypts them entries = [] for i in range(N): entries.append(clients[i].generate_key_ex_part_one()) encrypted_entries = server.receive_key_ex_part_one(entries) # print(encrypted_entries) # server sends encrypted secrets to everyone for i in range(N): clients[i].receive_encrypted_entries(encrypted_entries) # each client does OT with server for k keys for i in range(N): prefs = clients[i].get_preferences() m = [] for p in prefs: row_secs, col_secs, v = server.k_N_OT_one(self.g) row, col = Helper.one_to_two_dimension(p, N) row_val = self.one_N_OT(server, clients[i], row_secs, row) col_val = self.one_N_OT(server, clients[i], col_secs, col) key = self.roundup(int(v**(row_val * col_val))) m.append(encrypted_entries[p] ^ key) clients[i].update_with_entries(m) # each client broadcasts bs = [] for i in range(N): b = clients[i].broadcast() bs.extend(b) # server broadcasts to all clients for i in range(N): clients[i].receive_broadcast(bs) # print out everything succ = 0 for i in range(N): prefs = clients[i].get_preferences() manual_matches = [] for p in prefs: if i in clients[p].get_preferences(): manual_matches.append(p) manual_matches = manual_matches matches = clients[i].get_matches() if (sorted(manual_matches) == sorted(matches)): succ += 1 keys = clients[i].get_completed_keys() # print(i, prefs, keys, matches) if succ == N: success += 1 print("basic successes: " + str(success))
def crawler_news(self): loop = True page = 0 while loop: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop(False, 'imerys_news') news_data = soup.find_all('div', {'class': "documents-list__item"}) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('a') title = title_data.text if title_data else "" url_data = news.find('a', {'href': True}) url = url_data['href'] if url_data else '' publish_date_data = news.find( 'div', {'class': 'documents-list__item__date'}) publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data and publish_date_data.text != '' else '' news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "imerys", "ticker": "imerys_scrapped", "industry_name": "imerys", "news_provider": "imerys" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, 'imerys_news') if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() page += 1 else: print("News Not Found") loop = False
def show_live_chart(self, symbol, tf, refresh_rate=1): df_tf_sma = self.show_current_chart(symbol, tf) while True: Helper.sleep(refresh_rate) clear_output(wait=True) df_tf_sma = self.show_current_chart(symbol, tf, data=df_tf_sma)
def crawler_news(self): try: loop = True page = 0 while loop: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) soup = BeautifulSoup(response.content, 'html.parser') bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) news_data = soup.find_all('div', {'class': "views-col"}) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('h4') title = title_data.text if title_data else "" url_data = news.find('a', {'href': True}) url = "https://www.aruplab.com" + str( url_data['href']) if url_data else '' # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue publish_date_data = news.find( 'span', {'class': 'views-field views-field-field-date'}) publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data and publish_date_data.text != '' else '' url_response = crawler.MakeRequest( url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find( 'main', { 'role': "main" } ).find( 'div', { 'class': 'field field--name-body field--type-text-with-summary field--label-hidden field__item' }) description = [] regex = re.compile(r'[\n\xa0]') for desc in description_data.find_all('p'): description.append(regex.sub("", str(desc.text))) description = ''.join(description) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "aruplab", "ticker": "aruplab_scrapped", "industry_name": "aruplab", "news_provider": "aruplab" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk. __dict__['ops']) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() page += 1 else: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)
def update_evo(cik, metric, nonperf): df = get_data(cik) # Date range for date axis min_date = df['First Filing Date'].min() max_date = datetime.today() x_axis = Helper.get_months(min_date, max_date) # Compute totals total_count = [len(df) for _ in x_axis] total_value = [df['Loan Amount ($)'].sum() for _ in x_axis] # Counts rep_count = np.array([ df[df['Repossession Date'] <= t]['Repossession Date'].count() for t in x_axis ]) del_count = np.array([ df[(df['30 Days Delinquency Date'] <= t) & (df['Repossession Date'].isnull())] ['30 Days Delinquency Date'].count() for t in x_axis ]) rep_countdiff = np.insert(np.diff(rep_count), 0, 0) del_countdiff = np.insert(np.diff(del_count), 0, 0) # Percentages rep_countper = rep_count / total_count del_countper = del_count / total_count # Value-based rep_val = np.array([ df[df['Repossession Date'] <= t]['Loan Amount ($)'].sum() for t in x_axis ]) del_val = np.array([ df[(df['30 Days Delinquency Date'] <= t) & (df['Repossession Date'].isnull())]['Loan Amount ($)'].sum() for t in x_axis ]) rep_valdiff = np.insert(np.diff(rep_val), 0, 0) del_valdiff = np.insert(np.diff(del_val), 0, 0) # Percentages rep_valper = rep_val / total_value del_valper = del_val / total_value # Assign axes if metric == 'Total count': y_1 = del_count y_2 = rep_count y_1d = del_countdiff y_2d = rep_countdiff y_1p = del_countper y_2p = rep_countper else: y_1 = del_val y_2 = rep_val y_1d = del_valdiff y_2d = rep_valdiff y_1p = del_valper y_2p = rep_valper trace_1 = dict(name='Non-performing', x=x_axis, y=y_1, type='line', line=dict(color=colors['amber'], width=5)) trace_2 = dict(name='Repossessed', x=x_axis, y=y_2, type='line', line=dict(color=colors['red'], width=5)) trace_1d = dict(name='Non-perf Change', x=x_axis, y=y_1d, type='bar', line=dict(color=colors['amber'])) trace_2d = dict(name='Repo Change', x=x_axis, y=y_2d, type='bar', line=dict(color=colors['red'])) traces = [] if 'D' in nonperf: traces.append(trace_1) traces.append(trace_1d) if 'R' in nonperf: traces.append(trace_2) traces.append(trace_2d) layout = go.Layout(font=dict(family=font_family, size=14), autosize=True, height=550, margin={ 't': 50, 'l': 100 }, xaxis=dict(showline=True, mirror=True, ticks='outside', zeroline=False), yaxis=dict(title=f'<b>{metric}</b>', showline=True, mirror=True, ticks='outside', zeroline=False, tickformat=Helper.get_format(metric)), showlegend=True, legend=dict(orientation='h')) return dict(data=traces, layout=layout)
def crawler_news(self): loop = True page = 1 while loop: response = crawler.MakeRequest(self.url.format(page=page), 'Get', postData=self.body, headers=self.headers) if (response.status_code == 200): soup = BeautifulSoup(response.content, 'html.parser') else: break bulk_obj = DbOperations.Get_object_for_bulkop(False, 'titanx_news') news_data = soup.find('div', { 'class': "x-main full" }).find_all('div', {'class': 'x-container max width'}) if news_data: for news in news_data: news_dict = Helper.get_news_dict() title_data = news.find('h2', {'class': 'entry-title'}) title = title_data.text.strip() if title_data else "" url_data = news.find('a', {'href': True}) url = url_data['href'] if url_data else '' publish_date_data = news.find('time', {'class': 'entry-date'}) publish_date = Helper.parse_date( publish_date_data.text ) if publish_date_data and publish_date_data.text != '' else '' url_response = crawler.MakeRequest(url, 'Get', postData=self.body, headers=self.headers) url_soup = BeautifulSoup(url_response.content, 'html.parser') description_data = url_soup.find( 'div', {'class': "entry-content content"}) description = [] regex = re.compile(r'[\n\xa0]') if description_data.h2 != None: for desc in description_data.h2.find_all_previous( "p")[::-1]: description.append(regex.sub("", str(desc.text))) else: for desc in description_data.find_all('p'): description.append(regex.sub("", str(desc.text))) description = ''.join(description) print(description) print(title) print(url) print(publish_date) news_dict.update({ "title": title, "news_title_uid": hashlib.md5(title.encode()).hexdigest(), "url": url, "link": url, "news_url_uid": hashlib.md5(url.encode()).hexdigest(), "description": description, "text": description, "publishedAt": publish_date, 'date': publish_date, "publishedAt_scrapped": publish_date, "company_id": "titanx", "ticker": "titanx_scrapped", "industry_name": "titanx", "news_provider": "titanx" }) bulk_obj.insert(news_dict) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, 'titanx_news') if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 0: bulk_obj.execute() page += 1 else: print("News Not Found") loop = False
def get_data(trust_cik): return Helper.load_autoloans_by_cik([trust_cik])
class Game: def __init__(self): # Declaring Member variables self.grid = None self.UPDATE_RATE = None self.NUM_OF_GENERATIONS = None # Helper class contains some handy utilities self.helper = Helper() def startGame(self): ''' This function is the epicenter of action! It commands the flow of the program. ''' self.helper.clearConsole() self.getInput() for i in range(self.NUM_OF_GENERATIONS): self.helper.clearConsole() print(f"Generation {i+1}") self.updateGrid() self.helper.printGrid(self.grid) print("\n\n\nPress <Ctrl+C> to exit early", end='') time.sleep(1. / self.UPDATE_RATE) def getInput(self): ''' User is allowed to choose a pattern to start the game ''' print("Welcome, Player!\n") patterns = '''Below is the list of available patterns: >> block >> beehive >> loaf >> boat >> tub >> blinker >> toad >> beacon >> glider >> lwss (stands for Light-weight spaceship) >> mwss (stands for Middle-weight spaceship) >> hwss (stands for Heavy-weight spaceship) ''' print(patterns) user_pattern = input( "Please type the name of a pattern (Press <Enter> to select the default= Glider): " ) or "glider" try: with open("config.json") as file: config = json.load(file) self.UPDATE_RATE = config["update_rate"] self.NUM_OF_GENERATIONS = config["num_of_generations"] self.grid = np.array(config[user_pattern.lower()]) print("\n\nSuccess! Values loaded from config") time.sleep(1) except: print("\n\nERROR: Either typo in entered pattern name or " + "error in config.json. Proceeding with default values!") self.UPDATE_RATE = 5 # Hz self.NUM_OF_GENERATIONS = 1000 self.grid = np.array([ # Glider [1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0] ]) time.sleep(3) return def updateGrid(self): ''' KERNEL = [[1,1,1], [1,0,1], [1,1,1]] Convolving with this efficiently provides us the neighbor count for all cells ''' KERNEL = np.ones((3, 3)) KERNEL[1, 1] = 0 # Wrap around padding: any pattern going outside the # grid will wrap around from the other side left_col = self.grid[:, [-1]] right_col = self.grid[:, [0]] grid_copy = np.hstack((left_col, self.grid, right_col)) top_row = grid_copy[-1, :] bottom_row = grid_copy[0, :] grid_copy = np.vstack((top_row, grid_copy, bottom_row)) neighbour_grid = self.helper.conv2d(grid_copy, KERNEL) # Unsustainable neighbors, definitely dead dead_mask = np.bitwise_or(neighbour_grid < 2, neighbour_grid > 3) self.grid[dead_mask] = 0 # Resurrecting neighbors, already live cells unaffected # Dead cells get revived resurrect_mask = neighbour_grid == 3 self.grid[resurrect_mask] = 1 return
def clustering(self, houses, batteries, counter): ''' Use K-means to cluster and assign houses to batteries ''' # count clustering iterations counter += 1 # capacitated clustering greedy = Greedy(houses, batteries, self.greedy) houses = copy.deepcopy(greedy.houses) batteries = copy.deepcopy(greedy.batteries) # Save solution solution = Solution(copy.deepcopy(houses), copy.deepcopy(batteries)) self.solutions.append(solution) for battery in batteries: houses = Sort.distance(Sort, houses, battery) # calculate cable costs cable_costs = Helper.houses_costs(Helper, batteries, houses) battery_costs = 0 for battery in batteries: battery_costs += battery.cost costs = cable_costs + battery_costs # for each cluster, new centre = means of all connected houses coördinates for battery in batteries: x = 0 y = 0 count = 0 for house in battery.connected: x += house.x y += house.y count += 1 # Avoid dividing by zero battery is not connected to any house (HAC) if count != 0: # average mean_x = round(x / count) mean_y = round(y / count) # new centre battery.x = mean_x battery.y = mean_y # Stops when costs haven't changed if costs < self.costs: self.costs = costs # disconnect for battery in batteries: battery.connected = [] battery.current_usage = 0 for house in houses: house.connected = False house.connection = False house.costs = 0 # --> Solution self.batteries = batteries self.houses = houses # try again self.clustering(houses, batteries, counter) else: pass
bulk_obj.execute() page += 1 else: print("All news has been scrapped !!") loop = False except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True) # Create and configure logger logging.basicConfig(filename="news_scraping_logs.log", format='%(asctime)s %(message)s', filemode='w') logger = logging.getLogger() url = "https://investor.harvardbioscience.com/press-releases?field_nir_news_date_value%5Bmin%5D=2020&field_nir_news_date_value%5Bmax%5D=2020&items_per_page=10&page={page}" news_obj = harvardbioscience(url, logger=logger) news_obj.crawler_news() news_collection = Helper.getNewsCollection() processed_collection = Helper.getProcessNewsCollection() news_log_collection = Helper.getLogCollection() isInserted, rowCount = Helper.processNews(news_collection, processed_collection, 'harvardbioscience') print('Total rows added Process collection => ' + str(rowCount)) # UPDATING LOG COLLECTION if (isInserted): Helper.makeLog(news_log_collection, processed_collection, 'harvardbioscience')
count = 0.0 for num in AUCs: count = count + num AUC = count / AUCs.__len__() return Recall, AUC if __name__ == '__main__': embedding_size = Const.embedding_size drop_ratio = Const.drop_ratio epoch = Const.epoch batch_size = Const.batch_size data = Data() h = Helper() num_users = data.get_user_size() num_items = data.get_item_size() hrm = HRM(num_users, num_items, embedding_size, drop_ratio) # print(hrm) lr_flag = True pre_mean_loss = 999 lr = Const.lr for i in range(0, epoch): hrm.train() # 开始训练时间 t1 = time() if lr_flag: lr *= 1.1 else:
def crawler(self): try: page = 1 bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) while True: response = crawler.MakeRequest(self.url.format(page=page), "Get", headers=self.headers) if 'we did not find any results related' in response.text: break soup = BeautifulSoup(response.content, "html.parser") boxs = soup.find_all("li", {"class": 'utc-cards--item'}) for box in boxs: date = box.find("time", {"class": "utc-card--date"}).text if date: date = Helper.parse_date(date) if date.year < datetime.datetime.now().year: break datadict = Helper.get_news_dict() datadict.update( {"url": "https://www.rtx.com" + box.find("a")['href']}) description = self.fetchDescription("https://www.rtx.com" + box.find("a")['href']) url = "https://www.rtx.com" + box.find("a")['href'] # Check if already present unqUrl = hashlib.md5(url.encode()).hexdigest() chkIsExists = DbOperations.GetData( self.news_collection, {"news_url_uid": str(unqUrl)}, {}, QueryType.one) if (chkIsExists): print("Already saved. url - ( " + url + " )") continue datadict.update({ "date": date, "news_provider": "UNITED TECHNOLOGIES CORPORATION", "formatted_sub_header": box.find("a").text, "publishedAt": date, "description": description, "title": box.find("a").text, "link": "https://www.rtx.com" + box.find("a")['href'], "text": description, "company_id": "rtx", "news_url_uid": hashlib.md5( ("https://www.rtx.com" + box.find("a")['href']).encode()).hexdigest() }) bulk_obj.insert(datadict) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops'] ) > 100: bulk_obj.execute() bulk_obj = DbOperations.Get_object_for_bulkop( False, self.news_collection) if len(bulk_obj._BulkOperationBuilder__bulk.__dict__['ops']) > 0: bulk_obj.execute() except Exception as e: self.logger.error(f"Error Occured : \n", exc_info=True)