def backup_releases(args, repo_cwd, repository, repos_template, include_assets=False): repository_fullname = repository['full_name'] # give release files somewhere to live & log intent release_cwd = os.path.join(repo_cwd, 'releases') log_info('Retrieving {0} releases'.format(repository_fullname)) mkdir_p(repo_cwd, release_cwd) query_args = {} release_template = '{0}/{1}/releases'.format(repos_template, repository_fullname) releases = retrieve_data(args, release_template, query_args=query_args) # for each release, store it log_info('Saving {0} releases to disk'.format(len(releases))) for release in releases: release_name = release['tag_name'] output_filepath = os.path.join(release_cwd, '{0}.json'.format(release_name)) with codecs.open(output_filepath, 'w+', encoding='utf-8') as f: json_dump(release, f) if include_assets: assets = retrieve_data(args, release['assets_url']) for asset in assets: download_file(asset['url'], os.path.join(release_cwd, asset['name']), get_auth(args))
def retrieve(): list_link = "{}{}".format(BASE_URL, BOOK_LIST) html = retrieve_data("goodreads.top-books.html", list_link) soup = bs(html, "html.parser") rows = soup.find_all("tr", {"itemtype": "http://schema.org/Book"}) for row in rows[:100]: link = row.find("div", {"data-resource-type": "Book"}).a["href"] book_link = "{}{}".format(BASE_URL, link) fname = "{}.{}.html".format("goodreads", link_to_fname(link)) print("Fetching {}...".format(book_link)) html = retrieve_data(fname, book_link) try: soup = bs(html, "html.parser") title = soup.find("h1", {"id": "bookTitle"}).get_text() title = clean_whitespace(title) description = soup.select("div#description span")[-1].get_text() description = clean_whitespace(description) link = soup.find("a", {"id": "buyButton"})["href"] genres = soup.select(".left .bookPageGenreLink") genres = [clean_whitespace(genre.get_text()) for genre in genres] image = soup.find("img", {"id": "coverImage"})["src"] if not image.startswith("http"): image = "{}{}".format(BASE_URL, image) product = Product(title, "{}{}".format(BASE_URL, link), image, "books", genres, description) product.dump() except Exception as e: print("ERROR:", e) print("")
def backup_milestones(username, password, repo_cwd, repository, repos_template): milestone_cwd = os.path.join(repo_cwd, 'milestones') # if args.skip_existing and os.path.isdir(milestone_cwd): # return logger.info(f"Retrieving {repository['full_name']} milestones") mkdir_p(repo_cwd, milestone_cwd) template = f"{repos_template}/{repository['full_name']}/milestones" query_args = {'state': 'all'} _milestones = retrieve_data(username, password, template, query_args=query_args) milestones = {} for milestone in _milestones: milestones[milestone['number']] = milestone log_info('Saving {len(list(milestones.keys()))} milestones to disk') for number, milestone in list(milestones.items()): milestone_file = f'{milestone}/{number}.json' with codecs.open(milestone_file, 'w', encoding='utf-8') as f: json_dump(milestone, f) return
def get_scene_graph_of_image(id=61512): """ Get Scene Graph of an image. """ image = get_image_data(id=id) data = utils.retrieve_data('/api/v0/images/' + str(id) + '/graph') if 'detail' in data and data['detail'] == 'Not found.': return None return utils.parse_graph(data, image)
def get_region_descriptions_of_image(id=61512): """ Get the region descriptions of an image. """ image = get_image_data(id=id) data = utils.retrieve_data('/api/v0/images/' + str(id) + '/regions') if 'detail' in data and data['detail'] == 'Not found.': return None return utils.parse_region_descriptions(data, image)
def get_image_data(id=61512): """ Get data about an image. """ data = utils.retrieve_data('/api/v0/images/' + str(id)) if 'detail' in data and data['detail'] == 'Not found.': return None image = utils.parse_image_data(data) return image
def get_region_graph_of_region(image_id=61512, region_id=1): """ Get Region Graph of a particular Region in an image. """ image = get_image_data(id=image_id) data = utils.retrieve_data('/api/v0/images/' + str(image_id) + '/regions/' + str(region_id)) if 'detail' in data and data['detail'] == 'Not found.': return None return utils.parse_graph(data[0], image)
def get_image_ids_in_range(start_index=0, end_index=99): """ Get Image ids from start_index to end_index. """ ids_per_page = 1000 start_page = start_index // ids_per_page + 1 end_page = end_index // ids_per_page + 1 ids = [] for page in range(start_page, end_page + 1): data = utils.retrieve_data('/api/v0/images/all?page=' + str(page)) ids.extend(data['results']) ids = ids[start_index % 1000:] ids = ids[:end_index - start_index + 1] return ids
def retrieve_products_for_interest(interest): list_url = "{}{}/{}-gifts{}".format(BASE_URL, LIST_URL, interest, QUERY_STR) html = retrieve_data("uncommon-goods.{}.html".format(interest), list_url) soup = bs(html, "html.parser") prod_links = [link["href"] for link in soup.select("article.product a")] for link in prod_links[:100]: prod_link = "{}{}".format(BASE_URL, link) fname = "{}.{}.html".format("uncommon-goods", link_to_fname(link)) print("Fetching {}...".format(prod_link)) html = retrieve_data(fname, prod_link) soup = bs(html, "html.parser") try: title = soup.find("h1", {"itemprop": "name"}).get_text() title = clean_whitespace(title) description = soup.select_one(".theStoryCopy p").get_text() description = clean_whitespace(description) image = soup.select_one("a#mainImage img")["src"] if not image.startswith("http"): image = "{}{}".format(BASE_URL, image) price = soup.find("span", {"itemprop": "price"}).get_text() price = float(clean_whitespace(price)) tags = get_tags(description) product = Product(title, "{}{}".format(BASE_URL, link), image, interest, tags, description, price=price) product.dump() except Exception as e: print("ERROR:", e) print("")
def get_all_image_ids(): """ Get all Image ids. """ page = 1 _next = '/api/v0/images/all?page=' + str(page) ids = [] while True: data = utils.retrieve_data(_next) ids.extend(data['results']) if data['next'] is None: break page += 1 _next = '/api/v0/images/all?page=' + str(page) return ids
def retrieve(): list_link = "{}{}".format(BASE_URL, FILM_LIST) html = retrieve_data("imdb.top-films.html", list_link) soup = bs(html, "html.parser") film_links = soup.select("tbody.lister-list tr .titleColumn a") film_links = [link["href"] for link in film_links] for link in film_links[:100]: film_link = "{}{}".format(BASE_URL, link) fname = "{}.{}.html".format("imdb", link_to_fname(link)) print("Fetching {}...".format(film_link)) html = retrieve_data(fname, film_link) soup = bs(html, "html.parser") try: title = soup.select_one(".title_wrapper h1").get_text() title = clean_whitespace(title) description = soup.select_one(".plot_summary .summary_text") description = clean_whitespace(description.get_text()) image = soup.select_one(".poster a img")["src"] if not image.startswith("http"): image = "{}{}".format(BASE_URL, image) link = soup.select_one(".winner-option.watch-option")["data-href"] genres = soup.select(".title_wrapper .subtext a[href^=\"/genre\"]") genres = [clean_whitespace(genre.get_text()) for genre in genres] product = Product(title, "{}{}".format(BASE_URL, link), image, "films", genres, description) product.dump() except Exception as e: print("ERROR:", e) print("")
def _backup_data(username, password, name, template, output_file, output_directory, overwrite=True): # skip_existing = args.skip_existing if overwrite: logger.info(f'Retrieving {username} {name}') mkdir_p(output_directory) data = retrieve_data(username, password, template) logger.info(f'Writing {len(data)} {name} to disk') with codecs.open(output_file, 'w', encoding='utf-8') as f: json_dump(data, f)
def get_QA_of_image(id=61512): """ Get all QAs for a particular image. """ page = 1 next = '/api/v0/image/' + str(id) + '/qa?page=' + str(page) qas = [] image_map = {} while True: data = utils.retrieve_data(next) for d in data['results']: if d['image'] not in image_map: image_map[d['image']] = get_image_data(id=d['image']) qas.extend(utils.parse_QA(data['results'], image_map)) if data['next'] is None: break page += 1 next = '/api/v0/image/' + str(id) + '/qa?page=' + str(page) return qas
def interactive_visual(): X_train, Y_train, X_test, Y_test = retrieve_data() model = keras.models.load_model('Deep_NN_MNIST') #model = keras.models.load_model('Dropout_NN_mnist') evaluation = model.evaluate(X_test, Y_test) print('accuracy on test set: ' + str(evaluation[1])) plt.ion() while True: rand = random.randint(0, X_test.shape[0]) prediction = int(predict(model, X_test[rand])) print('NN prediction: ' + str(prediction)) plt.imshow(np.reshape(X_test[rand], (28, 28))) x = input('press enter to display new image or type exit: ') if x == 'exit': break plt.close() elif x == '': plt.draw() plt.clf()
def main(): (train_x, train_y), (test_x, test_y) = retrieve_data() start_time = time.time() # Build and evaluate all three models for the data slnn = single_layer_neural_network(train_x, train_y, test_x, test_y) mlff = multi_layer_feed_forward(train_x, train_y, test_x, test_y) cnn = convolutional_neural_network(train_x, train_y, test_x, test_y) # Print the metrics of each model on the data print(f"Achieved {slnn}% Accuracy with Single-Layer Neural Network.") print( f"Achieved {mlff[1] * 100.0}% Accuracy with Multi-Layer Feed Forward Neural Network." ) print( f"Achieved {cnn[1] * 100.0}% Accuracy with Convolutional Neural Network." ) print( f"Finished building and training all Neural Networks in {time.time() - start_time}s." )
def get_all_QAs(qtotal=100): """ Gets all the QA from the dataset. qtotal: int total number of QAs to return. Set to None if all QAs should be returned """ page = 1 next = '/api/v0/qa/all?page=' + str(page) qas = [] image_map = {} while True: data = utils.retrieve_data(next) for d in data['results']: if d['image'] not in image_map: image_map[d['image']] = get_image_data(id=d['image']) qas.extend(utils.parse_QA(data['results'], image_map)) if qtotal is not None and len(qas) > qtotal: return qas if data['next'] is None: break page += 1 next = '/api/v0/qa/all?page=' + str(page) return qas
def get_QA_of_type(qtype='why', qtotal=100): """ Get all QA's of a particular type - example, 'why' qtype: string possible values: what, where, when, why, who, how. qtotal: int total number of QAs to return. Set to None if all QAs should be returned """ page = 1 next = '/api/v0/qa/' + qtype + '?page=' + str(page) qas = [] image_map = {} while True: data = utils.retrieve_data(next) for d in data['results']: if d['image'] not in image_map: image_map[d['image']] = get_image_data(id=d['image']) qas.extend(utils.parse_QA(data['results'], image_map)) if qtotal is not None and len(qas) > qtotal: return qas if data['next'] is None: break page += 1 next = '/api/v0/qa/' + qtype + '?page=' + str(page) return qas
if __name__ == "__main__": client = Client(config.api_key, config.secret_key) trade_pairs = ["BTCEUR", "ETHEUR", "DOGEEUR", "XRPEUR"] kline_size = "15m" windows_short = np.linspace(5, 200, 50).astype(int) windows_long = np.linspace(15, 400, 50).astype(int) if not os.path.exists(result_folder): os.makedirs(result_folder) for pair in trade_pairs: data = utils.retrieve_data(client, pair, kline_size, start="2020-01-01") perform_backtesting(pair, kline_size, data, windows_short, windows_long) windows_short = {"BTCEUR": 20, "ETHEUR": 46, "DOGEEUR": 50, "XRPEUR": 24} windows_long = { "BTCEUR": 240, "ETHEUR": 330, "DOGEEUR": 100, "XRPEUR": 400 } for pair in trade_pairs: data = utils.retrieve_data(client, pair,
print('Processing categories :') for c in categories: print(' -', c) cat_dir = join(path, c) sites = [] files = [f for f in listdir(cat_dir) if isfile(join(cat_dir, f))] for filename in files: print(' -', filename) with open(join(cat_dir, filename), 'r') as f: try: data = json.load(f) except: print('!!! Error in file', filename) else: data = retrieve_data(data) data = preprocess(data) data['website'] = filename.split('/')[-1][:-4] sites.append(data) d = { 'cat_name': c, 'sites': sites, 'average': compute_average_site(sites, c) } out['categories'].append(d) # Saving with open('data.json', 'w') as f: json.dump(out, f)
def backup_pulls(username, password, repo_cwd, repository, repos_template): #has_pulls_dir = os.path.isdir('{0}/pulls/.git'.format(repo_cwd)) # if args.skip_existing and has_pulls_dir: # return logger.info(f"Retrieving {repository['full_name']} pull requests") # noqa pulls_cwd = os.path.join(repo_cwd, 'pulls') mkdir_p(repo_cwd, pulls_cwd) pulls = {} pulls_template = f"{repos_template}/{repository['full_name']}/pulls" logger.info(f"Pull template is {pulls_template}") query_args = { 'filter': 'all', 'state': 'all', 'sort': 'updated', 'direction': 'desc', } # if not args.include_pull_details: # pull_states = ['open', 'closed'] # for pull_state in pull_states: # query_args['state'] = pull_state # _pulls = retrieve_data_gen(args, # _pulls_template, # query_args=query_args) # for pull in _pulls: # if args.since and pull['updated_at'] < args.since: # break # if not args.since or pull['updated_at'] >= args.since: # pulls[pull['number']] = pull # else: _pulls = retrieve_data_gen(username, password, pulls_template, query_args=query_args) for pull in _pulls: # if args.since and pull['updated_at'] < args.since: # break # if not args.since or pull['updated_at'] >= args.since: pulls[pull['number']] = retrieve_data(username, password, pulls_template + '/{}'.format(pull['number']), single_request=True)[0] logger.info('Saving {0} pull requests to disk'.format( len(list(pulls.keys())))) comments_template = pulls_template + '/{0}/comments' commits_template = pulls_template + '/{0}/commits' for number, pull in list(pulls.items()): # if args.include_pull_comments or args.include_everything: template = comments_template.format(number) pulls[number]['comment_data'] = retrieve_data(username, password, template) #if args.include_pull_commits or args.include_everything: template = commits_template.format(number) pulls[number]['commit_data'] = retrieve_data(username, password, template) pull_file = '{0}/{1}.json'.format(pulls_cwd, number) with codecs.open(pull_file, 'w', encoding='utf-8') as f: json_dump(pull, f)
def backup_issues(username, password, repo_cwd, repository, repos_template, since=None): #has_issues_dir = os.path.isdir('{0}/issues/.git'.format(repo_cwd)) # if args.skip_existing and has_issues_dir: # return logger.info('Retrieving {0} issues'.format(repository['full_name'])) issue_cwd = os.path.join(repo_cwd, 'issues') mkdir_p(repo_cwd, issue_cwd) issues = {} issues_skipped = 0 issues_skipped_message = '' _issue_template = '{0}/{1}/issues'.format(repos_template, repository['full_name']) should_include_pulls = True issue_states = ['open', 'closed'] for issue_state in issue_states: query_args = {'filter': 'all', 'state': issue_state} ##since os the time stamp after which everything shall be scraped if since: query_args['since'] = since _issues = retrieve_data(username, password, _issue_template, query_args=query_args) for issue in _issues: # skip pull requests which are also returned as issues # if retrieving pull requests is requested as well if 'pull_request' in issue: issues_skipped += 1 continue issues[issue['number']] = issue if issues_skipped: issues_skipped_message = ' (skipped {0} pull requests)'.format( issues_skipped) logger.info('Saving {0} issues to disk{1}'.format(len(list(issues.keys())), issues_skipped_message)) comments_template = _issue_template + '/{0}/comments' events_template = _issue_template + '/{0}/events' for number, issue in list(issues.items()): #if args.include_issue_comments or args.include_everything: template = comments_template.format(number) issues[number]['comment_data'] = retrieve_data(username, password, template) #if args.include_issue_events or args.include_everything: template = events_template.format(number) issues[number]['event_data'] = retrieve_data(username, password, template) issue_file = '{0}/{1}.json'.format(issue_cwd, number) with codecs.open(issue_file, 'w', encoding='utf-8') as f: json_dump(issue, f) return
from utils import save, retrieve_data data = retrieve_data("backup.txt") save(data, "resume_data.txt")
def start_trading_bot(client, trade_pairs, kline_size, window_short, window_long): #### Initialization #### DF_dict = {} positions = {} for symbol in trade_pairs: # pull initial dataframes utils.delete_data(symbol, kline_size) DF_dict[symbol] = utils.retrieve_data(client, symbol, kline_size, save=True, start="2021-04-13") # get information about current investments: bal = utils.get_currency_balance(client, symbol) if (float(bal) * float(DF_dict[symbol]["close"].iloc[-1])) > 3: positions[symbol] = True else: positions[symbol] = False print(positions) #### Actual bot #### while True: log.info( f'########################### Next Iteration: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} ########################### ' ) start = time.time() for symbol in trade_pairs: log.info( f"########################### {symbol} ###########################" ) # update data, try catch the potential api resetting try: DF_dict[symbol] = utils.update_data(client, symbol, kline_size, save=True) except Exception as e: print(e) log.info( f"Data pull error on Binance side, waiting 15 minutes to reconnect" ) break # calculating rolling_windows and z-score DF_dict[symbol] = utils.make_rolling_and_score( DF_dict[symbol], windows_short[symbol], windows_long[symbol]) print(DF_dict[symbol].tail(5)) current_opportunity = DF_dict[symbol]["score"].iloc[-1] log.info(f"Z-Score of {symbol}: {current_opportunity}") # getting account information like balance etc. try: bal = utils.get_currency_balance(client, symbol) except Exception as e: print(e) log.info( f"Data pull error on Binance side, waiting 15 minutes to reconnect" ) break log.info( f'current balance of {symbol.split("EUR")[0]}: {bal}') # check opportunities and potentially issue an order if current_opportunity > 0: if positions[symbol]: pass else: # Actual buy function, handle with care! order = client.create_order(symbol=symbol, side=SIDE_BUY, type=ORDER_TYPE_MARKET, quoteOrderQty=300) print(order) positions[symbol] = True log.info( f' market BUY order placed for {symbol} !!!' ) else: if positions[symbol]: # Actual sell function, handle with care! decimal_place = 15 while decimal_place > -1: try: order = client.create_order(symbol=symbol, side=SIDE_SELL, type=ORDER_TYPE_MARKET, quantity=quantity) break except: decimal_place -= 1 quantity = np.round( float( client.get_asset_balance( asset=symbol.split("EUR")[0])["free"]), decimal_place) print(order) positions[symbol] = False log.info( f' market SELL order placed for {symbol} !!!' ) else: pass end = time.time() # sleep for exactly 15 minutes since start time.sleep(60 * 15 - (end - start) - 1 / 24)
import utils import data cv_data = utils.retrieve_data("resume_data.txt") sections = [section for section in cv_data] working = True while working: q1 = "Which section would you like to access? " r1 = utils.option_regulator(sections, q1) current_section = True while current_section: q2 = "What would you like to do in {}? ".format(r1) interactions = ["Create", "Edit", "Remove", "View", "Archive"] r2 = utils.option_regulator(interactions, q2) q3 = "Would you like to {} more entries? ".format(r2.lower()) while r2 == "Create": data.add(cv_data[r1]) r3 = utils.option_regulator(utils.y_n, q3) if r3 == "No": break while r2 == "Edit": data.edit(cv_data[r1]) r3 = utils.option_regulator(utils.y_n, q3) if r3 == "No": break while r2 == "Remove": data.remove(cv_data[r1]) r3 = utils.option_regulator(utils.y_n, q3) if r3 == "No":
from utils import save, retrieve_data data = retrieve_data("resume_data.txt") save(data, "backup.txt")
def retrieve_repositories(username, password): single_request = False template = 'https://{0}/user/repos'.format(get_github_api_host()) # print (f"Template for retrieve_repos is {template}") # else: # if args.private and not args.organization: # log_warning('Authenticated user is different from user being backed up, thus private repositories cannot be accessed') # template = 'https://{0}/users/{1}/repos'.format( # get_github_api_host(args), # args.user) # if args.organization: # orgnization_repos_template = 'https://{0}/orgs/{1}/repos'.format( # get_github_api_host(args), # args.user) ##If you want to fetch only one repository # repository_template = 'https://{0}/repos/{1}/{2}'.format( # get_github_api_host(args), # args.user, # args.repository) repos = retrieve_data(username, password, template, single_request=single_request) #c_pretty_print(repos[0]) ##append start repos starred_template = 'https://{0}/users/{1}/starred'.format( get_github_api_host(), username) starred_repos = retrieve_data(username, password, starred_template, single_request=False) # flag each repo as starred for downstream processing for item in starred_repos: item.update({'is_starred': True}) logger.info("Starred Repos first element") #c_pretty_print(starred_repos[0]) ##append start repos repos.extend(starred_repos) ###appemd gists gists_template = 'https://{0}/users/{1}/gists'.format( get_github_api_host(), username) gists = retrieve_data(username, password, gists_template, single_request=False) # flag each repo as a gist for downstream processing for item in gists: item.update({'is_gist': True}) logger.info("GIST first element") #c_pretty_print(gists[0]) repos.extend(gists) ##append star gists by the user starred_gists_template = 'https://{0}/gists/starred'.format( get_github_api_host()) starred_gists = retrieve_data(username, password, starred_gists_template, single_request=False) # flag each repo as a starred gist for downstream processing for item in starred_gists: item.update({'is_gist': True, 'is_starred': True}) repos.extend(starred_gists) return repos
def get_authenticated_user(username, password): template = 'https://{0}/user'.format(get_github_api_host()) logger.info(f'THis is the template from authenticated_user {template}') data = retrieve_data(username, password, template, single_request=True) return data[0]