def chaikin(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV cur_date = pg_query(PSQL.client, "SELECT max(date) FROM portfolio.day_chaikin;")[0].values[0] nxt_id, comp_cur = det_cur_chaikin(PSQL) total_stocks = len(comp_idx) for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status = comp_name_conv[rh_id]) if rh_id in comp_cur.keys() and cur_date <= np.datetime64(comp_cur[rh_id]): continue comp_data = pg_query(PSQL.client, "SELECT date, adl FROM portfolio.adl_day where rh_id = '%s';" % (rh_id)) comp_data.rename(columns = {0:'date', 1:'adl'}, inplace = True) comp_data.sort_values('date', ascending = True, inplace = True) if len(comp_data) < 11: continue adl_3_ema = calc_ema(comp_data, 3, 'adl') adl_10_ema = calc_ema(comp_data, 10, 'adl') adl_ema = pd.merge(adl_3_ema, adl_10_ema, left_on = 'date', right_on = 'date') adl_ema['chaikin'] = adl_ema['adl_3_ema'] - adl_ema['adl_10_ema'] for date, chaikin in adl_ema[['date', 'chaikin']].values: if rh_id in comp_cur.keys() and date <= np.datetime64(comp_cur[rh_id]): continue if chaikin != chaikin: continue if chaikin == np.inf or chaikin == -np.inf: continue script = "INSERT INTO portfolio.day_chaikin(day_chaikin_id, rh_id, date, chaikin) VALUES (%i, '%s', '%s', %.4f);" % (nxt_id, rh_id, datetime.strptime(str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), chaikin) pg_insert(PSQL.client, script) nxt_id += 1 comp_cur[rh_id] = date
def test_solver(x, y, clf, prev_score, verbose=False): # x, y, clf, scaler, prev_score, verbose = X[features], Y, log_clf, scale, log_checkpoint_score, False print('Searching for best solver.') model = deepcopy(clf) scores = {} all_solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'] cur_solver = 0 for solve in all_solvers: if not verbose: progress(cur_solver, len(all_solvers)) model.set_params(**{'clf__solver': solve}) score = cross_validate(x, y, model.set_params(**{'clf__solver': solve}), only_scores=True, verbose=verbose) # score = test_scaler(model, x, y, verbose = verbose, prev_score = False, prev_scaler = False, skip = False, prog = False) if verbose: print('%s Score: %.5f' % (solve, score)) scores[solve] = {'score': score} cur_solver += 1 if not verbose: progress(cur_solver, len(all_solvers)) best_solver = max(scores, key=lambda x: scores[x]['score']) if scores[best_solver]['score'] > prev_score: print('Using %s.' % (best_solver)) return (model.set_params(**{'clf__solver': best_solver}), scores[best_solver]['score']) else: print('No Improvement, Using Default') return (clf, prev_score)
def pop_times(psql, _last_time): # psql, _last_time = PSQL, last_time print('Updating times') nxt_time, cur_max_time, psql = _det_current_times_(psql) # for time_mult in range(0, 1440): insert_times = [] if isinstance(cur_max_time, bool): for time_mult in range(0, 1440): insert_times.append(_last_time + timedelta(minutes = -30)*time_mult) else: for time_mult in range(0, 1440): if cur_max_time >= _last_time + timedelta(minutes = -30)*time_mult: break else: insert_times.append(_last_time + timedelta(minutes = -30)*time_mult) total_times = len(insert_times) for time_num, (time) in enumerate(reversed(insert_times)): progress(time_num, total_times, status = time) script = "insert into binance.times (time_id, q_date, q_time, full_time) VALUES (%i, '%s', '%s', '%s')" % (nxt_time, time.date(), time.time(), time) pg_insert(psql.client, script) nxt_time += 1 print('\n')
def pop_exchanges(psql, _last_time, _prices, mrkt_conv, current_mkt_data, time_conv, binance): # psql, _last_time, _prices = PSQL, last_time, prices print('Updating exchanges') total_exchanges = len(_prices) for exchange_num, (mkt) in enumerate([i['symbol'] for i in _prices]): progress(exchange_num, total_exchanges, status = mkt) bootstrap_time = False if mkt not in mrkt_conv.keys(): continue if mrkt_conv[mkt] in current_mkt_data.keys(): if current_mkt_data[mrkt_conv[mkt]] == time_conv[_last_time]: continue else: bootstrap_time = current_mkt_data[mrkt_conv[mkt]] klines = binance.get_historical_klines(mkt, '30m', "2 weeks ago UTC") if len(klines) > 0: mkt_hist = process_kline(klines, time_conv, bootstrap_time) for hist in mkt_hist: script = "insert into binance.exchanges (ex_time_id, ex_market_id, open, high, low, \ close, volume, quote_asset_volume, number_of_trades, taker_buy_base_asset_volume, \ taker_buy_quote_asset_volume) \ VALUES (%i, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % (hist['open_time'], mrkt_conv[mkt], hist['open'], hist['high'], hist['low'], hist['close'], hist['volume'], hist['quote_asset_volume'], hist['number_of_trades'], hist['taker_buy_base_asset_volume'], hist['taker_buy_quote_asset_volume']) pg_insert(psql.client, script) print('\n')
def feat_selector_optimization(x_, y_): # x_, y_ = proc_data[drivers], proc_data[target_col] cross_val_scores = {} tot_runs = 10 * 7 * 5 * 6 run_num = 0 for trees in np.linspace(10, 100, 10): for depth in np.linspace(3, 21, 7): for samples in np.linspace(.1, .9, 5): for feats in np.linspace(.1, 1, 6): run_num += 1 _x, _y = deepcopy(x_), deepcopy(y_) estimator = ExtraTreesClassifier(max_features=feats, min_samples_split=samples, max_depth=int(depth), n_estimators=int(trees), random_state=53) cross_val_scores[cross_validate(_x, _y, estimator, StandardScaler(), 'logloss')] = { 'max_features': feats, 'min_samples_split': samples, 'max_depth': depth, 'n_estimators': trees } progress(run_num, tot_runs) print('\n') best_params = cross_val_scores[max(cross_val_scores.keys())] return (best_params)
def pull_tweets(psql, twitter): page = requests.get('https://coinmarketcap.com/all/views/all/') tree = html.fromstring(page.content) coin_name_conv = {v:k for k,v in zip(tree.xpath('//*[@id="currencies-all"]/tbody/tr/td[2]/a/text()'), tree.xpath('//*[@id="currencies-all"]/tbody/tr/td[3]/text()'))} coins = pg_query(psql.client, 'SELECT * FROM binance.coins') use_coins = {i: coin_name_conv[i] for i in coins[1].values if i in coin_name_conv.keys()} coin_id_conv = {v:k for k,v in coins.values} coin_num = 0 total_coins = len(use_coins.keys()) tweets = [] for symbol, name in use_coins.items(): progress(coin_num, total_coins, status = symbol) fetched_tweets = twitter.search(q = '%s|%s' % (symbol.lower(), name.lower()), count = 10, tweet_mode='extended') for tweet in fetched_tweets: # empty dictionary to store required params of a tweet parsed_tweet = {} tweet_data = tweet._json if 'retweeted_status' in tweet_data.keys(): tweet_data = tweet_data['retweeted_status'] parsed_tweet['text'] = clean_tweet(tweet_data['full_text']) parsed_tweet['id'] = tweet_data['id'] parsed_tweet['rt'] = tweet_data['retweet_count'] parsed_tweet['fav'] = tweet_data['favorite_count'] parsed_tweet['user'] = tweet_data['user']['id'] parsed_tweet['verified'] = tweet_data['user']['verified'] parsed_tweet['followers'] = tweet_data['user']['followers_count'] parsed_tweet['datetime'] = datetime.strptime(''.join([' '.join(i.split(' ')[1:]) for i in tweet_data['created_at'].split('+')]), '%b %d %H:%M:%S %Y') tweets.append(parsed_tweet) coin_num += 1 return(tweets, coin_id_conv, use_coins)
def nlu_insert(nlu_tweet_data, nlu, _psql_, _coin_id_conv, _use_coins): # tweet_data = pg_query(PSQL.client, 'select tweet_id, content from binance.twitter_tweets;') # nlus = pg_query(PSQL.client, 'select tweet_id from binance.twitter_nlu;') # nlus = list(set(nlus[0].values)) # tweet_idx = [i for i in tweet_data[0] if i not in nlus] # tweet_data = tweet_data.set_index(0).loc[tweet_idx].reset_index() # nlu_tweet_data, nlu, _psql_, _coin_id_conv, _use_coins = tweet_data[[0,5]], NLU, PSQL, coin_id_conv, use_coins # matches = {} for tweet_id, content in nlu_tweet_data[[0,5]].values: matches[tweet_id] = {} for symbol, name in _use_coins.items(): sym_name_match = [] if name.lower() in content: sym_name_match.append(name.lower()) if symbol.lower() in content: sym_name_match.append(symbol.lower()) if len(sym_name_match) > 0: matches[tweet_id][symbol] = sym_name_match nlu_idx = _det_current_nlu(_psql_) indexed_tweet_data = nlu_tweet_data.set_index(0) tweet_num = 0 total_tweets = len(matches.keys()) for k,v in matches.items(): progress(tweet_num, total_tweets) tweet_num += 1 targets = [] for vv in v.values(): targets += vv try: nlu_output = nlu.analyze(text=indexed_tweet_data[5].loc[k], features = Features(emotion=EmotionOptions(targets=targets), sentiment=SentimentOptions(targets=targets))).result except: continue coin_output = {} all_sentiment = nlu_output['sentiment']['targets'] try: all_emotion = nlu_output['emotion']['targets'] except: continue for coin, names in v.items(): coin_nlu = {} if len([i for i in all_sentiment if i['text'] in names]) == 0: continue if len([i for i in all_emotion if i['text'] in names]) == 0: continue coin_nlu['sentiment'] = np.mean([i['score'] for i in all_sentiment if i['text'] in names]) coin_nlu['sadness'] = np.mean([i['emotion']['sadness'] for i in all_emotion if i['text'] in names]) coin_nlu['joy'] = np.mean([i['emotion']['joy'] for i in all_emotion if i['text'] in names]) coin_nlu['fear'] = np.mean([i['emotion']['fear'] for i in all_emotion if i['text'] in names]) coin_nlu['disgust'] = np.mean([i['emotion']['disgust'] for i in all_emotion if i['text'] in names]) coin_nlu['anger'] = np.mean([i['emotion']['anger'] for i in all_emotion if i['text'] in names]) coin_output[coin] = coin_nlu for nlu_coin, nlu_values in coin_output.items(): script = "insert into binance.twitter_nlu (nlu_id, tweet_id, coin_id, sentiment, sadness, joy, fear, disgust, anger) VALUES (%i, %i, %i, %f, %f, %f, %f, %f, %f)" % (nlu_idx, k, _coin_id_conv[nlu_coin], nlu_values['sentiment'], nlu_values['sadness'], nlu_values['joy'], nlu_values['fear'], nlu_values['disgust'], nlu_values['anger']) pg_insert(_psql_.client, script) nlu_idx += 1
def commentExtract(videoId, count=-1): print("\nComments downloading") # 关闭http连接,增加重连次数 page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key)) while page_info.status_code != 200: if page_info.status_code != 429: print("Comments disabled") sys.exit() time.sleep(20) page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key)) page_info = page_info.json() # test # print(page_info) comments = [] co = 0 for i in range(len(page_info['items'])): # 对3000赞以上的评论进行保留,可以根据需求更改 if page_info['items'][i]['snippet']['topLevelComment']['snippet'][ 'likeCount'] >= 3000: comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) return comments PB.progress(co, count) # INFINTE SCROLLING while 'nextPageToken' in page_info: temp = page_info page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=page_info['nextPageToken'])) while page_info.status_code != 200: time.sleep(20) page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=temp['nextPageToken'])) page_info = page_info.json() for i in range(len(page_info['items'])): comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) return comments PB.progress(co, count) PB.progress(count, count, cond=True) print() return comments
def rsi(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV # nxt_id = 0 nxt_id, comp_cur = det_cur_rsi(PSQL) total_stocks = len(comp_idx) cur_date = pg_query( PSQL.client, "SELECT max(date) FROM portfolio.rsi_day_14;")[0].values[0] for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status=comp_name_conv[rh_id]) if rh_id in comp_cur.keys() and cur_date <= np.datetime64( comp_cur[rh_id]): continue comp_data = pg_query( PSQL.client, "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';" % (rh_id)) comp_data.rename(columns={0: 'date', 1: 'close_price'}, inplace=True) comp_data.sort_values('date', ascending=True, inplace=True) comp_data['diff'] = comp_data['close_price'].diff() comp_data['gains'] = comp_data['diff'].apply(lambda x: x if x > 0 else 0) comp_data['losses'] = comp_data['diff'].apply(lambda x: x if x < 0 else 0) comp_data.dropna(inplace=True) all_avg_gain = [] all_avg_loss = [] dates = [] all_avg_gain.append(comp_data.iloc[:14]['gains'].mean()) all_avg_loss.append(comp_data.iloc[:14]['losses'].mean()) dates.append(comp_data.iloc[14]['date']) comp_data = comp_data.iloc[15:] for _date, cur_gain, cur_loss in comp_data[['date', 'gains', 'losses']].values: dates.append(_date) all_avg_gain.append((all_avg_gain[-1] * 13 + cur_gain) / 14) all_avg_loss.append((all_avg_loss[-1] * 13 + cur_loss) / 14) for date, gain, loss in zip(dates, all_avg_gain, all_avg_loss): if gain == 0: rsi = 0 elif loss == 0: rsi = 100 else: rsi = 100 - (100 / 1 + (gain / loss)) if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date: continue script = "INSERT INTO portfolio.rsi_day_14(rsi_day_14_id, rh_id, date, rsi) VALUES (%i, '%s', '%s', %.3f);" % ( nxt_id, rh_id, datetime.strptime( str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), rsi) pg_insert(PSQL.client, script) nxt_id += 1 comp_cur[rh_id] = date
def random_search(x, y, model, params, trials=25, verbose=False): # x, y, model, params, trials, verbose = x, y, pipe, param_dist, 5, False def _rand_shuffle(choices): """ Randomly select a hyperparameter for search """ selection = {} for key, value in choices.items(): selection[key] = random.choice(value) return (selection) search_scores = {} current_params = deepcopy(model).get_params() max_combinations = np.prod([len(i) for i in params.values()]) if trials > max_combinations: trials = max_combinations for i in range(trials): progress(i, trials) iter_params = _rand_shuffle(params) iter_name = '_'.join("{!s}={!r}".format(k, v) for (k, v) in iter_params.items()) while iter_name in search_scores.keys(): iter_params = _rand_shuffle(params) iter_name = '_'.join("{!s}={!r}".format(k, v) for (k, v) in iter_params.items()) search_scores[iter_name] = {} estimator = deepcopy(model) for key, value in iter_params.items(): estimator = estimator.set_params(**{key: value}) search_scores[iter_name][key] = value if verbose: try: print('%s: %.3f' % (key, value)) except TypeError: print('%s: %s' % (key, value)) if estimator.get_params() == current_params: if verbose: print('Parameters already in default.') iter_score = -np.inf else: try: iter_score = cross_validate(x, y, estimator, only_scores=True, verbose=verbose) except: iter_score = -np.inf if verbose: print('- score: %.5f' % (iter_score)) search_scores[iter_name]['score'] = iter_score progress(i + 1, trials) best_combination = max(search_scores, key=lambda x: search_scores[x]['score']) return_results = {'score': search_scores[best_combination]['score']} return_results['parameters'] = {} best_params = dict(search_scores[best_combination]) best_params.pop('score') return_results['parameters'] = best_params return (return_results)
def commentExtract(videoId, count=-1): print("Comments downloading") page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key)) while page_info.status_code != 200: # if page_info.status_code != 429: if page_info.status_code == 403: # return(page_info.status_code) # print ("Comments disabled") # sys.exit() return 'Comments disabled' time.sleep(20) page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key)) page_info = page_info.json() comments = [] co = 0 for i in range(len(page_info['items'])): comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) print() return comments PB.progress(co, count) # INFINTE SCROLLING while 'nextPageToken' in page_info: temp = page_info page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=page_info['nextPageToken'])) while page_info.status_code != 200: time.sleep(20) page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=temp['nextPageToken'])) page_info = page_info.json() for i in range(len(page_info['items'])): comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) print() return comments PB.progress(co, count) PB.progress(count, count, cond=True) print() return comments
def commentExtract(videoId, count=-1): print("Comments downloading") page_info = requests.get( 'https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&maxResults=100&order=relevance&videoId=tCXGJQYZ9JA&key=AIzaSyDVrVDtn4F2tqpQDg8DeQ7KnL2avbkZ8Pk' ) while page_info.status_code != 200: if page_info.status_code != 429: print("Comments disabled") sys.exit() time.sleep(20) page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key)) page_info = page_info.json() comments = [] co = 0 for i in range(len(page_info['items'])): comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) print() return comments PB.progress(co, count) # INFINTE SCROLLING while 'nextPageToken' in page_info: temp = page_info page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=page_info['nextPageToken'])) while page_info.status_code != 200: time.sleep(20) page_info = requests.get( YOUTUBE_IN_LINK.format(videoId=videoId, key=key, pageToken=temp['nextPageToken'])) page_info = page_info.json() for i in range(len(page_info['items'])): comments.append(page_info['items'][i]['snippet']['topLevelComment'] ['snippet']['textOriginal']) co += 1 if co == count: PB.progress(co, count, cond=True) print() return comments PB.progress(co, count) PB.progress(count, count, cond=True) print() return comments
def channelVideoStatisticsExtract(channel_id, multithreading=False, thread_num=32): try: title = getChannelName(channel_id) except Exception: title = channel_id if multithreading: with open('ChannelStatistics/' + title + '.csv', 'w', encoding='utf-8-sig', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() queue = Queue(maxsize=thread_num) statisticsQueue = Queue(maxsize=128) getterThreads = [] for i in range(thread_num): t = threading.Thread(target=multithreadingStatisticsExtract, args=(queue, statisticsQueue,)) t.daemon = True getterThreads.append(t) t.start() writerThread = threading.Thread(target=multithreadingSaveStastics, args=(title, statisticsQueue,)) writerThread.start() getVideoIdByChannelId(channel_id, queue, True) queue.join() statisticsQueue.join() for i in range(thread_num): queue.put('--thread-end--') statisticsQueue.put('--thread-end--') for i in range(thread_num): getterThreads[i].join() writerThread.join() else: print('Getting channel video ids...') video_ids = getVideoIdByChannelId(channel_id) print('Getting all statistics') PB.progress(0, 100) statistics = [] curr_num = 0 total_num = len(video_ids) for video_id in video_ids: statistics.append(statisticsExtract(video_id)) curr_num += 1 PB.progress(curr_num, total_num) print('Wring statistics...', end='') saveStastics(statistics, title) print('done')
def fetching(fake_product_reviews, original_product_reviews): print("Phase 1...\n") pb.progress(0.0, 100) with open("../dataset/fake_pro.txt", "r") as file: for line in file: fake_product_reviews.append(line) with open("../dataset/original_pro.txt", "r") as file: for line in file: original_product_reviews.append(line) time.sleep(1)
def pop_coins(psql, coins): # psql = PSQL print('Updating coins') nxt_coin, cur_coins, psql = _det_current_(psql, 'coins') total_coins = len(coins) for coin_num, (coin) in enumerate([str(i['name']) for i in coins]): progress(coin_num, total_coins, status = coin) if coin not in cur_coins: script = "insert into binance.%s (coin_id, symbol) VALUES (%i, '%s')" % ('coins', nxt_coin, coin) pg_insert(psql.client, script) cur_coins.add(coin) nxt_coin += 1 print('\n')
def stoch_osc_k(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV # all_next_id, all_comp_cur = det_cur_ema(PSQL) cur_date = pg_query( PSQL.client, "SELECT max(date) FROM portfolio.sto_osc_14;")[0].values[0] total_stocks = len(comp_idx) nxt_id, comp_cur = det_cur_ma(PSQL) for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status=comp_name_conv[rh_id]) if rh_id in comp_cur.keys() and cur_date <= np.datetime64( comp_cur[rh_id]): continue # if rh_id in comp_cur.keys(): # missing_days = pg_query(PSQL.client, "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s' and date > '%s';" % (rh_id, all_comp_cur[period][rh_id])) # if len(missing_days) == 0: # continue # else: comp_data = pg_query( PSQL.client, "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';" % (rh_id)) comp_data.rename(columns={0: 'date', 1: 'close_price'}, inplace=True) comp_data.sort_values('date', ascending=True, inplace=True) per_high = comp_data['close_price'].rolling(window=14).max() per_low = comp_data['close_price'].rolling(window=14).min() for date, current_close, lowest_low, highest_high in zip( comp_data.date.values, comp_data.close_price.values, per_low, per_high.values): if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date: continue k = (current_close - lowest_low) / (highest_high - lowest_low) * 100 if k != k: continue script = "INSERT INTO portfolio.sto_osc_14(sto_osc_14_id, rh_id, date, k) VALUES (%i, '%s', '%s', %.2f);" % ( nxt_id, rh_id, datetime.strptime( str(date).split('.')[0], '%Y-%m-%dT%H:%M:%S'), k) pg_insert(PSQL.client, script) nxt_id += 1 comp_cur[rh_id] = date
def init_user_object_list(user_matrix): """ Creates a list of user objects from a user data matrix extracted from the database. Use when refreshing the list of users to search through. """ counter = 0 user_object_list = [] for user_row in user_matrix: temp_user_obj = User.User(user_row[:5], user_row[6:10],user_row[5],user_row[10:15], user_row[15:20],user_row[20:]) user_object_list.append(temp_user_obj) # print progress. REMOVE FOR FINAL IMPLEMENTATION counter = counter + 1 progress_bar.progress(counter, len(user_matrix)) print "\n" return user_object_list
def cci(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV nxt_id, comp_cur = det_cur_cci(PSQL) total_stocks = len(comp_idx) cur_date = pg_query( PSQL.client, "SELECT max(date) FROM portfolio.cci_day_20;")[0].values[0] for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status=comp_name_conv[rh_id]) if rh_id in comp_cur.keys() and cur_date <= np.datetime64( comp_cur[rh_id]): continue comp_ma = pg_query( PSQL.client, "SELECT date, avg_price from portfolio.ma_day_20 where rh_id = '%s'" % (rh_id)) comp_ma.rename(columns={0: 'date', 1: 'ma'}, inplace=True) comp_ma.sort_values('date', ascending=True, inplace=True) comp_ma.set_index('date', inplace=True) comp_typ_price = pg_query( PSQL.client, "SELECT date, (high_price + low_price + close_price)/3 from portfolio.day_prices where rh_id = '%s'" % (rh_id)) comp_typ_price.rename(columns={0: 'date', 1: 'typical'}, inplace=True) comp_typ_price.sort_values('date', ascending=True, inplace=True) comp_typ_price.set_index('date', inplace=True) comp_cci = comp_typ_price.join(comp_ma) comp_cci['typ_avg'] = comp_cci['typical'].rolling(window=20).mean() comp_cci['abs_dev'] = abs(comp_cci['typical'] - comp_cci['typ_avg']) comp_cci['mean_dev'] = comp_cci['abs_dev'].rolling(window=20).mean() comp_cci['cci'] = (comp_cci['typical'] - comp_cci['ma']) / (comp_cci['mean_dev'] * .015) comp_cci.dropna(inplace=True) comp_cci.reset_index(inplace=True) for date, cci in comp_cci[['date', 'cci']].values: if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date: continue if cci != cci or cci == np.inf or cci == -np.inf: continue script = "INSERT INTO portfolio.cci_day_20(cci_day_20_id, rh_id, date, cci) VALUES (%i, '%s', '%s', %.3f);" % ( nxt_id, rh_id, datetime.strptime( str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), cci) pg_insert(PSQL.client, script) nxt_id += 1 comp_cur[rh_id] = date
def dividends(full_update = False): print(' ~~ Dividends ~~ ') PSQL = db_connection('psql') id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks') current, next_id = det_cur_divs(PSQL) total_stocks = len(id_sym) for stock_num, (idx, sym) in enumerate(id_sym.values): progress(stock_num, total_stocks, status = sym) if not full_update and idx in current.keys(): continue url = 'https://api.iextrading.com/1.0/stock/%s/dividends/5y' % (sym) req = requests.request('GET', url) if req.status_code == 404: continue data = req.json() if len(data) == 0: continue for div in reversed(data): ex_date = datetime.strptime(div['exDate'], '%Y-%m-%d') if idx in current.keys() and ex_date <= current[idx]: continue if div['amount'] == '': continue amount = float(div['amount']) if div['paymentDate'] == '': continue payment_date = datetime.strptime(div['paymentDate'], '%Y-%m-%d') record_date = datetime.strptime(div['recordDate'], '%Y-%m-%d') if div['declaredDate'] == '': declared_date = 'null' else: declared_date = "'"+str(datetime.strptime(div['declaredDate'], '%Y-%m-%d'))+"'" script = "INSERT INTO portfolio.dividends(\ div_id, rh_id, ex_date, payment_date, record_date, declared_date, amount) \ VALUES (%i, '%s', '%s', '%s', '%s', %s, %.2f);" % (next_id, idx, ex_date, payment_date, record_date, declared_date, amount) pg_insert(PSQL.client, script) next_id += 1 current[idx] = ex_date ex_date = None payment_date = None record_date = None declared_date = None amount = None
def pop_markets(psql, prices): # psql = PSQL print('Updating markets') _, cur_coins, psql = _det_current_(psql, 'coins') nxt_market, cur_markets, psql = _det_current_(psql, 'markets') total_markets = len(prices) for market_num, (market) in enumerate([str(i['symbol']) for i in prices]): progress(market_num, total_markets, status = market) if market not in cur_markets: formed_market = validate_pair(market, cur_coins) if formed_market: script = "insert into binance.%s (market_id, symbol, sell_coin, buy_coin) VALUES (%i, '%s', '%s', '%s')" % ('markets', nxt_market, market, formed_market['sell_coin'], formed_market['buy_coin']) pg_insert(psql.client, script) cur_markets.add(market) nxt_market += 1 print('\n')
def obv_roc(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV cur_date = pg_query(PSQL.client, "SELECT max(date) FROM portfolio.adl_day;")[0].values[0] nxt_id, comp_cur = det_cur_obv(PSQL) total_stocks = len(comp_idx) for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status = comp_name_conv[rh_id]) if rh_id in comp_cur.keys() and cur_date <= np.datetime64(comp_cur[rh_id]): continue comp_data = pg_query(PSQL.client, "SELECT date, close_price, volume FROM portfolio.day_prices where rh_id = '%s';" % (rh_id)) comp_data.rename(columns = {0:'date', 1:'close_price', 2:'volume'}, inplace = True) comp_data.sort_values('date', ascending = True, inplace = True) last_close = comp_data.iloc[0]['close_price'] obvs = [comp_data.iloc[0]['volume']] dates = [comp_data.iloc[0]['date']] for date, close, volume in comp_data.iloc[1:].values: if close > last_close: obvs.append(obvs[-1] + volume) elif close < last_close: obvs.append(obvs[-1] - volume) elif close == last_close: obvs.append(obvs[-1]) dates.append(date) last_close = close obv = pd.DataFrame([dates, obvs]).T obv.rename(columns = {0: 'date', 1: 'obv'}, inplace = True) obv['ra'] = obv['obv'].rolling(window = 14).mean() obv['obv_roc'] = (obv['obv'] - obv['ra'].shift()) / obv['ra'].shift() obv.dropna(inplace = True) for date, obv in obv[['date', 'obv_roc']].values: if rh_id in comp_cur.keys() and date <= np.datetime64(comp_cur[rh_id]): continue if obv != obv: continue if obv == np.inf or obv == -np.inf: continue script = "INSERT INTO portfolio.obv_14_roc(obv_14_roc_id, rh_id, date, obv_roc) VALUES (%i, '%s', '%s', %.4f);" % (nxt_id, rh_id, datetime.strptime(str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), obv) pg_insert(PSQL.client, script) nxt_id += 1 comp_cur[rh_id] = date
def day_prices(): print(' ~~ Full Day Prices ~~ ') PSQL = db_connection('psql') current, next_id = det_cur_day_prices(PSQL, 'day') id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks') total_stocks = len(id_sym) trader = rs() login_data = trader.login() for stock_num, (idx, sym) in enumerate(id_sym.values): # if idx == '1d4d0780-ba27-4adc-ab12-0c3062fdf365': # asdfasdf progress(stock_num, total_stocks, status = sym) symbols = helper.inputs_to_set(sym) url = urls.historicals() payload = { 'symbols' : ','.join(symbols), 'interval' : 'day', 'span' : '5year', 'bounds' : 'regular'} data = trader.request_get(url,'results',payload) if data == [None]: continue for day in data[0]['historicals']: beg_date = datetime.strptime(day['begins_at'].replace('Z', '').replace('T', ' '), '%Y-%m-%d %H:%M:%S') if idx in current.keys() and beg_date <= current[idx]: continue open_price = float(day['open_price']) close_price = float(day['close_price']) high_price = float(day['high_price']) low_price = float(day['low_price']) volume = int(day['volume']) script = "INSERT INTO portfolio.day_prices(day_price_id, rh_id, date, open_price, close_price, high_price, low_price, volume) VALUES ('%s', '%s', '%s', %.2f, %.2f, %.2f, %.2f, %i);" % (next_id, idx, beg_date, open_price, close_price, high_price, low_price, volume) pg_insert(PSQL.client, script) next_id += 1 open_price = None close_price = None high_price = None low_price = None volume = None trader.logout()
def LEASE_NTC_INIT(): current_page = get_web_page( LEASE_URL, urlJumpIp) # return a dict of dict of list of dict total_rows = get_total_rows(current_page) page_count = 0 row_data = [] while page_count <= total_rows: data = get_info(current_page) row_data += data page_count += pageRow current_page = get_web_page( LEASE_URL + "&firstRow=" + str(page_count) + "&totalRows=" + str(total_rows), urlJumpIp) progress(page_count, total_rows, __file__) save(row_data) print(str(__file__) + " complete")
def judgement(inputt, output, arr, brr): co = 0 print("Network testing:") for i in range(len(inputt)): st = np.resize(inputt[i], (1, len(inputt[i]))) st = st / 255 mid = nonlin(np.dot(st, arr)) end = nonlin(np.dot(mid, brr)) ind = np.argmax(end) if (ind == output[i]): co += 1 if (i % 1000 == 0): prb.progress(i, len(inputt)) prb.progress(len(inputt), len(inputt), cond=True) print('\n') print("Accuracy : ", ((co / len(inputt)) * 100), '%')
def _participation(self, chat_id, message): output = "" for key, value in sorted(self._flood[chat_id].get_all().items()): output += self._get_name(key) + ": " if message == self._commands[12]: output += str(value) + '\n' output += progress(value, self._flood_amount[chat_id]) + '\n' if output == "": return "Пока всё тихо..." return output
def training(inputt, output): np.random.seed(1) # Weight Holder. IMPORTANT/TRAINEE/FUTURE arr = np.random.uniform(-1.0, 1.0, (len(inputt[0]), 20)) #transpose brr = np.random.uniform(-1.0, 1.0, (20, 10)) #same, duh! print("Network training:") for i in range(len(inputt)): a1 = np.resize(inputt[i], (1, len(inputt[i]))) arr, brr = func(a1, output[i], arr, brr) if (i % 1000 == 0): prb.progress(i, len(inputt)) prb.progress(len(inputt), len(inputt), cond=True) print() return arr, brr
def moving_average(comp_idx, comp_name_conv): # comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV cur_date = pg_query( PSQL.client, "SELECT max(date) FROM portfolio.day_prices;")[0].values[0] all_next_id, all_comp_cur = det_cur_ma(PSQL) total_stocks = len(comp_idx) for stock_num, (rh_id) in enumerate(comp_idx): progress(stock_num, total_stocks, status=comp_name_conv[rh_id]) for period in [200, 100, 50, 20, 10]: if rh_id in all_comp_cur[period].keys( ) and cur_date <= np.datetime64(all_comp_cur[period][rh_id]): continue comp_data = pg_query( PSQL.client, "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';" % (rh_id)) comp_data.rename(columns={ 0: 'date', 1: 'close_price' }, inplace=True) comp_data.sort_values('date', ascending=True) date_idx = comp_data['date'] dma = calc_mov_avg(comp_data, period) for date, avg_price in zip(date_idx.values, dma.values): if rh_id in all_comp_cur[period].keys( ) and date <= np.datetime64(all_comp_cur[period][rh_id]): continue if avg_price != avg_price: continue script = "INSERT INTO portfolio.ma_day_%s(ma_day_%s_id, rh_id, date, period, avg_price) VALUES (%i, '%s', '%s', %i, %.2f);" % ( period, period, all_next_id[period], rh_id, datetime.strptime( str(date).split('.')[0], '%Y-%m-%dT%H:%M:%S'), period, avg_price) pg_insert(PSQL.client, script) all_next_id[period] += 1 all_comp_cur[period][rh_id] = date
def _gen_indices(): use_kws, kw_weights, use_conc, conc_weights, use_cat, cat_weights = find_weights( ) _kw_index = {i: [] for i in use_kws} _conc_index = {i: [] for i in use_conc} _cat_index = {i: [] for i in use_cat} total_docs = len(data) for doc_num, (idx, v) in enumerate(data.items()): for doc_kw in v['keywords']: _kw_index[doc_kw['text'].replace(' ', '_').lower()] += [{ 'doc_id': idx, 'score': doc_kw['relevance'] * kw_weights[doc_kw['text'].replace(' ', '_').lower()] }] for doc_conc in v['concepts']: _conc_index[doc_conc['text'].replace(' ', '_').lower()] += [{ 'doc_id': idx, 'score': doc_conc['relevance'] * conc_weights[doc_conc['text'].replace(' ', '_').lower()] }] for doc_cat in v['categories']: _cat_index[doc_cat['label'].replace(' ', '_').lower()] += [{ 'doc_id': idx, 'score': doc_cat['score'] * cat_weights[doc_cat['label'].replace(' ', '_').lower()] }] progress(doc_num + 1, total_docs) print('\n') return (_kw_index, _conc_index, _cat_index)
def pop_prices(psql, time_conv, coins): # psql, time_conv, coins = PSQL, TIME_CONV, COINS print('Updating prices') all_prices = pg_query(psql.client, 'select pr_symbol, max(pr_time_id) from binance.prices group by pr_symbol') price_conv = {k:v for k,v in all_prices.values} total_prices = len(coins) for price_num, (coin) in enumerate([i['name'] for i in coins]): progress(price_num, total_prices, status = coin) for time in list(time_conv.keys()): if coin in price_conv.keys(): if price_conv[coin] >= time_conv[time]: break try: price_data = requests.get('https://min-api.cryptocompare.com/data/pricehistorical?fsym=%s&tsyms=USD&ts=%s'%(coin, time.strftime("%s"))).json() except: continue script = "insert into binance.prices (pr_symbol, pr_time_id, usd) VALUES \ ('%s', %i, %s);" % (coin, time_conv[time], price_data[coin]['USD']) pg_insert(psql.client, script) print('\n')
def test_scaler(clf, prev_score, x, y, verbose=False, prog=True): # clf, prev_score, x, y, verbose, prog = log_clf, log_checkpoint_score, X, Y, False, True if prog: print('Searching for best scaler.') scores = {} model = deepcopy(clf) total_scales = 3 cur_scale = 0 for scale, name in zip( [StandardScaler(), MinMaxScaler(), RobustScaler()], ['Standard', 'MinMax', 'Robust']): if not verbose and prog: progress(cur_scale, total_scales) if model.steps[1][1].__class__ == scale.__class__: if verbose: print('%s Already Included' % (name)) print('Score: %.5f' % (prev_score)) cur_scale += 1 continue scale_score = cross_validate(x, y, model.set_params(**{'scale': scale}), only_scores=True, verbose=verbose) if verbose: print('%s Score: %.5f' % (name, scale_score)) scores[name] = {'scale': scale} scores[name]['score'] = scale_score cur_scale += 1 if not verbose and prog: progress(cur_scale, total_scales) best_scale = max(scores, key=lambda x: scores[x]['score']) if scores[best_scale]['score'] > prev_score: if prog: print('Using %s.' % (best_scale)) return (model.set_params(**{'scale': scores[best_scale]['scale']}), scores[best_scale]['score']) else: if prog: print('Keeping original.') return (clf, prev_score)
def channelCommentExtract(channel_id, multithreading=False, thread_num=16): if multithreading: global queue queue = Queue(maxsize=thread_num) for i in range(thread_num): t = threading.Thread(target=multithreadingCommentExtract) t.daemon = True t.start() getVideoIdByChannelId(channel_id, queue, True) queue.join() else: print('Getting channel video ids...') video_ids = getVideoIdByChannelId(channel_id) print('Getting all comments') PB.progress(0, 100) comments = [] curr_num = 0 total_num = len(video_ids) for video_id in video_ids: comments.append(commentExtract(video_id)) curr_num += 1 PB.progress(curr_num, total_num) PB.progress(curr_num, total_num) pass
def main(): """The main function.""" parser = OptionParser(version='%prog v' + __version__) parser.add_option('-c', '--config', default='config.ini', help='Location of config file (default: %default)', metavar='FILE') parser.add_option('-o', '--output', default='simplenotebak.json.txt', help='Output file name (default: %default)', metavar='FILE') (options, args) = parser.parse_args() # set script's path and add '/' to end script_path = os.path.abspath(os.path.dirname(sys.argv[0])) + '/' if args: print 'debug: you wanted to run command: ' + args[0] config = SafeConfigParser() # can pass multiple files to config.read but it merges them, which we don't want if not config.read(options.config): # could not read file, try the script's path if not config.read(script_path + options.config): # Still can't find it, error out print 'Could not read any config file' sys.exit(1) email = config.get('simplenote', 'email') password = config.get('simplenote', 'password') sn = Simplenote(email, password) if not sn.login(): print 'ERROR:', sn.last_error sys.exit(1) index = sn.full_index() #index = sn.index(5) if sn.has_error: print 'ERROR:', sn.last_error sys.exit(1) #print '- index -' #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(index) print 'number of notes:', str(len(index)) notecount = float(len(index)) #print '- data -' notes = [] i = 0 #for note_meta in index['data']: for note_meta in index: note = sn.note(note_meta['key']) if sn.has_error: print 'ERROR:', sn.last_error sys.exit(1) notes.append(note) #pp.pprint(note) i += 1 pb.progress(50, math.floor(float(i) / notecount * 100.0)) print 'Number of api calls:', str(sn.api_count) # xml format #xmlnotes = '' #for note in notes: # if 'xml' in locals(): # xml.append(dict_to_xml(note)) # else: # xml = dict_to_xml(note) #xmlnotes = '<?xml version="1.0" encoding="UTF-8"?>' + "\n" #xmlnotes += ET.tostring(xml, encoding="UTF-8") #print xmlnotes # JSON format jsonnotes = [] i = 0 for note in notes: if note['deleted'] == 1: continue json_note_tmp = {'modifydate': format_date(float(note['modifydate']))} json_note_tmp.update({'createdate': format_date(float(note['createdate']))}) json_note_tmp.update({'tags': note['tags']}) json_note_tmp.update({'systemtags': note['systemtags']}) json_note_tmp.update({'content': note['content']}) json_note_tmp.update({'key': note['key']}) jsonnotes.append(json_note_tmp) #print json.dumps(jsonnotes) fh = open(options.output, 'w') fh.write(json.dumps(jsonnotes)) fh.close()
def insert_band(raster, layer, outName, bandIndex=None, substitute=False, \ bandNames=None, outFormat=None, createOptions=None): """ Insert raster layers into a multiband image at desired positions. Use: raster (string): the name of the output file (full path and file extension). layer (string / list): EITHER the name of the input raster containing the layer to be inserted (full path and file extension) OR a list containing all (single-band) rasters to be inserted. outName (string): the name of the output file (full path and file extension). bandIndex (integer / list): EITHER a single value indicating the position for the layer OR a list of integers containing all positions. Per default, the layer(s) is/are inserted at the end. substitute (boolean): if set "True", the layers to be inserted will substitute the existing one at their resprective positions. Defaults to "False", which means that the new layers will be inserted at the desired position and all other bands will be appended afterwards. Substitution will not work if "bandIndex" ist not set! bandNames (list): a list of band names for the inserted layers. Defaults to the names of the input files. outFormat (string): the desired format of the output file as provided by the GDAL raster formats (see: http://www.gdal.org/formats_list.html). Defaults to the format of the input raster. createOptions (list): a list of strings, containing advanced raster creation options such as band interleave. Example: createOptions=['interleave=bil'] """ gdal.AllRegister() # get basic information and create output raster: ds = gdal.Open(raster, GA_ReadOnly) bands = ds.RasterCount band = ds.GetRasterBand(1) x = ds.RasterXSize y = ds.RasterYSize proj = ds.GetProjection() transform = ds.GetGeoTransform() meta = ds.GetMetadata_Dict() if isinstance(layer, basestring) == True: layer = list(layer) if bandIndex != None: if isinstance(bandIndex, list) == False: newBands = list(bandIndex) else: newBands = bandIndex else: newBands = [i for i in xrange(bands + 1, bands + (len(layer) + 1))] if bandNames != None: if isinstance(bandNames, list) == False: bandNames = list(bandNames) else: bandNames = bandNames if outFormat == None: outFormat = ds.GetDriver().GetDescription() else: outFormat = outFormat driver = gdal.GetDriverByName(outFormat) if createOptions == None: ds_out = driver.Create(outName, x, y, bands + len(newBands), band.DataType) else: ds_out = driver.Create(outName, x, y, bands + len(newBands), band.DataType, \ createOptions) ds_out.SetProjection(proj) ds_out.SetGeoTransform(transform) band = None if bandIndex != None: lyr_index = 1 band_index = 1 # insert single layers at desired positions for b in xrange(1, bands + 1): # progress bar: try: pr.progress(b, xrange(1, bands + 1)) except: pass if b in newBands: insert = gdal.Open(layer[lyr_index - 1], GA_ReadOnly) band = insert.GetRasterBand(1) dtype = band.DataType nodata = band.GetNoDataValue() data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(band_index) # create new band names (either from original image or from user input): if bandNames == None: name = string.join(['Band', str(band_index)], sep='_') desc = band.GetDescription() if len(desc) == 0: desc = layer[lyr_index - 1] meta[name] = desc band_out.SetDescription(band.GetDescription()) else: name = string.join(['Band', str(band_index)], sep='_') meta[name] = bandNames[lyr_index - 1] band_out.SetDescription(bandNames[lyr_index - 1]) band_out.SetNoDataValue(double(nodata)) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) lyr_index += 1 if substitute == False: band_index += 1 insert = None # then next band of original image: band = ds.GetRasterBand(b) dtype = band.DataType nodata = band.GetNoDataValue() data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(b) name = string.join(['Band', str(band_index)], sep='_') meta[name] = band.GetDescription() band_out.SetDescription(band.GetDescription()) band_out.SetNoDataValue(double(nodata)) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) band_index += 1 band_out = None band = None else: band = ds.GetRasterBand(b) dtype = band.DataType nodata = band.GetNoDataValue() data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(b) name = string.join(['Band', str(band_index)], sep='_') meta[name] = band.GetDescription() band_out.SetDescription(band.GetDescription()) band_out.SetNoDataValue(double(nodata)) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) band_index += 1 band_out = None band = None else: if substitute == True: raise ValueError("Option 'substitute' will only work with 'bandIndex' set!") lyr_index = 1 band_index = 1 # insert single layers at the end for b in xrange(1, bands + 1): band = ds.GetRasterBand(b) dtype = band.DataType nodata = band.GetNoDataValue() data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(b) name = string.join(['Band', str(band_index)], sep='_') meta[name] = band.GetDescription() band_out.SetDescription(band.GetDescription()) band_out.SetNoDataValue(double(nodata)) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) band_index += 1 band_out = None band = None for i in xrange(0, len(newBands)): insert = gdal.Open(layer[i], GA_ReadOnly) band = insert.GetRasterBand(1) dtype = band.DataType nodata = band.GetNoDataValue() data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(band_index) # create new band names (either from original image or from user input): if bandNames == None: name = string.join(['Band', str(band_index)], sep='_') desc = band.GetDescription() if len(desc) == 0: desc = layer[lyr_index - 1] meta[name] = desc band_out.SetDescription(band.GetDescription()) else: name = string.join(['Band', str(band_index)], sep='_') meta[name] = bandNames[lyr_index - 1] band_out.SetDescription(bandNames[lyr_index - 1]) band_out.SetNoDataValue(double(nodata)) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) lyr_index += 1 band_index += 1 ds_out.SetMetadata(meta) ds_out = None ds = None
def stack_images(images, outfile, bands=None, of='GTiff', co=None, noData=0, bandNames=None): """ Create a layerstack from all files within a directory. Use: images (list): a list of strings containing the full path and file extension for the single raster files which shall be stacked together. Will be stacked in the order as given by this list. outfile (string): the name of the output file (full path and file extension). bands (list): a list containing the desired band numbers of the input images which shall be used for stacking. By this, single bands out of multiband images can be used. Defaults to "None", which means that the first band will be used. If more than one band from the same multiband image shall be used, the filename must be given again in the "images"-list. of (string): the desired format of the output file as provided by the GDAL raster formats (see: http://www.gdal.org/formats_list.html). Defaults to 'GTiFF'. co (list): a list of strings, containing advanced raster creation options such as band interleave. Example: createOptions=['interleave=bil'] noData (int / float): the no-data value to be set. Defaults to the no-data value of the first input file. bandNames (list): a list of band names for the output file. Defaults to the names of the input files. """ gdal.AllRegister() # get basic information and create output raster: ds = gdal.Open(images[0], GA_ReadOnly) band = ds.GetRasterBand(1) proj = ds.GetProjection() transform = ds.GetGeoTransform() x = ds.RasterXSize y = ds.RasterYSize dtype = band.DataType # nodata if noData == None: nodata = band.GetNoDataValue() else: nodata = noData driver = gdal.GetDriverByName(of) # check for create options if co == None: ds_out = driver.Create(outfile, x, y, len(images), dtype) else: ds_out = driver.Create(outfile, x, y, len(images), dtype, co) # set projection ds_out.SetProjection(proj) ds_out.SetGeoTransform(transform) band = None ds = None # loop through all files and stack them: band_index = 1 for name in images: # progress bar: try: pr.progress(name, images) except: pass # open imput image ds = gdal.Open(name, GA_ReadOnly) # check if specific bands are desired if bands == None: band = ds.GetRasterBand(1) else: band = ds.GetRasterBand(bands[band_index-1]) # set data type dtype = band.DataType # read data data = band.ReadRaster(0, 0, x, y, x, y, dtype) band_out = ds_out.GetRasterBand(band_index) # create new band names (either from original image or from user input): if bandNames == None: band_out.SetDescription(band.GetDescription()) else: band_out.SetDescription(bandNames[band_index - 1]) band_out.SetNoDataValue(nodata) band_out.WriteRaster(0, 0, x, y, data, x, y, dtype) band_index += 1 band = None ds = None ds_out = None
def ftp_download(url, user='', pw='', localPath=os.getcwd(), pattern=None): """ Download files from a ftp-server. Can handle one level of subfolders. Use: url (string): the complete url of the ftp-server containing the desired files. user (string): 'Username' for server login. pw (string): 'Password' for server login. localPath (string): the local directory, to which the files shall be downloaded. Defaults to the current working directory. pattern (list): a list of strings containing the pattern of characters to look for in the file names as a list. May be useful, if there are many files from which only a selection shall be taken. Be aware that the list is taken as "either or", not as "and"! Examples: pattern=['.txt'] (if all txt-files are desired) pattern=['_test_', '_demo'] (if all desired files contain either "_test_" or "_demo") """ if url.__contains__('ftp://'): url = url.split('//')[1] # set up connection: ftp = ftplib.FTP(url.split('/')[0]) ftp.connect() ftp.login(user, pw) # switch directory on server: root = url.lstrip(url.split('/')[0]) # if root still contains leading "/", remove it if root[0] == "/": root = root[1:len(root)] ftp.cwd(root) # list all files and/or directories: listing = [] ftp.retrlines('NLST', listing.append) print 'Downloading data...' # loop through the current directory and get the files: for l in listing: # progress bar try: pr.progress(l, listing) except: pass # check if l is file or directory if os.path.splitext(l)[1] != '': # file # check for desired pattern: if pattern == None: # download files: local_filename = os.path.join(localPath, l) lf = open(local_filename, 'wb') ftp.retrbinary('RETR ' + l, lf.write) lf.close() else: for p in pattern: # download files: if l.__contains__(str(p)): local_filename = os.path.join(localPath, l) lf = open(local_filename, 'wb') ftp.retrbinary('RETR ' + l, lf.write) lf.close() else: # directory or file without extension # check if l is a file without extension try: ftp.cwd(l) # directory # get file names: files = [] ftp.retrlines('NLST', files.append) # check for desired pattern: if pattern == None: for f in files: # download files: local_filename = os.path.join(localPath, f) lf = open(local_filename, 'wb') ftp.retrbinary('RETR ' + f, lf.write) lf.close() else: for p in pattern: for f in files: # download files: if f.__contains__(str(p)): local_filename = os.path.join(localPath, f) lf = open(local_filename, 'wb') ftp.retrbinary('RETR ' + f, lf.write) lf.close() # go back up one level ftp.cwd('/..') ftp.cwd(root) except: # file with no extension if pattern == None: # download local_filename = os.path.join(localPath, l) lf = open(local_filename, 'wb') ftp.retrbinary('RETR' + l, lf.write) lf.close() else: for p in pattern: if l.__contains__(str(p)): # download local_filename = os.path.join(localPath, l) lf = open(local_filename, 'wb') ftp.retrbinary('RETR' + l, lf.write) lf.close() ftp.close()
def apply_mask(image, maskImage, outName, outPath=None, of='Gtiff', co=None, keepWarp=False): """ Apply a mask to a (multiband) image. The mask will be multiplied with the input image. Both images have to cover the same area, but may differ in resolution. Use: image (string): the image file (full path and file extension). maskImage (string): the image file which shall be used as mask (full path and file extension). outName (string): the name of the output file (with file extension). outpath (string): the directory to which the output file will be written. Defaults to the parent directory of the input image. of (string): the desired format of the output file as provided by the GDAL raster formats (see: http://www.gdal.org/formats_list.html). Defaults to 'GTiFF'. co (list): a list of strings, containing advanced raster creation options such as band interleave. Example: co=['interleave=bil'] keepWarp (boolean): if image and maskImage have different projection, resolution or extent, the mask image will be warped using gdalwarp. If keepWarp is True, this warped mask image will not be deleted. Defaults to False (will be deleted). """ # check if outfile exists and delete it: path = os.path.dirname(image) if outPath == None: pass else: path = outPath if outName in os.listdir(path): print 'Outfile already exists, will be overwritten!' os.remove(os.path.join(path, outName)) gdal.AllRegister() driver = gdal.GetDriverByName(of) # get information of input raster ds = gdal.Open(image, GA_ReadOnly) cols = ds.RasterXSize rows = ds.RasterYSize bands = ds.RasterCount img_proj = ds.GetProjection() img_geo = ds.GetGeoTransform() # get information of mask mask_ds = gdal.Open(maskImage, GA_ReadOnly) mask_proj = mask_ds.GetProjection() mask_geo = mask_ds.GetGeoTransform() # check if inout image and mask share the same projection, resolution and extent if img_proj == mask_proj and img_geo == mask_geo: pass else: mask_ds = None # warp mask to match input image print 'Mask image does not match input image. Transforming mask now...' # get bounding box of input image minX = img_geo[0] maxY = img_geo[3] pixSizeX = img_geo[1] pixSizeY = img_geo[5] maxX = minX + (cols * pixSizeX) minY = maxY + (rows * pixSizeY) ## set up gdalwarp command tempfile = os.path.join(path, os.path.splitext(maskImage)[0] + '_warped' + os.path.splitext(maskImage)[1]) if os._exists(tempfile): os.remove(tempfile) of = '-of GTiff' bb = '-te %s %s %s %s '%(minX, minY, maxX, maxY) res = '-tr %s %s '%(pixSizeX, pixSizeY) epsg = int(img_proj.split('''AUTHORITY["EPSG","''')[-1].strip('''"]]''')) proj = ''' -t_srs epsg:%s ''' %(epsg) warpcmd = string.join(['gdalwarp', of, bb, res, proj, maskImage, tempfile], sep=' ') # execute os.system(warpcmd) # open new file maskImage = tempfile mask_ds = gdal.Open(maskImage, GA_ReadOnly) # open mask image: maskBand = mask_ds.GetRasterBand(1) mask = maskBand.ReadAsArray(0, 0, mask_ds.RasterXSize, mask_ds.RasterYSize) mask_ds = None # create (empty) output image: if co == None: ds_out = driver.Create(os.path.join(path, outName), \ ds.RasterXSize, ds.RasterYSize, bands, \ ds.GetRasterBand(1).DataType) else: ds_out = driver.Create(os.path.join(path, outName), \ ds.RasterXSize, ds.RasterYSize, bands, \ ds.GetRasterBand(1).DataType, co) ds_out.SetProjection(ds.GetProjection()) ds_out.SetGeoTransform(ds.GetGeoTransform()) ds_out.SetMetadata(ds.GetMetadata()) # apply mask to all bands: for b in xrange(1, bands + 1): band = ds.GetRasterBand(b) data = band.ReadAsArray(0, 0, cols, rows) data_out = mask * data # write masked band: b_out = ds_out.GetRasterBand(b) b_out.WriteArray(data_out) b_out = None try: pr.progress(b, xrange(1, bands + 1)) except: pass # clean up ds_out = None ds = None if keepWarp == False: os.remove(tempfile)