def get_host_metrics(connect, host): metrics = {} datetime = [] cpu = [] memory = [] process = [] cursor = connect.cursor() sql = "select timestamp,week_content from pm_metrics_points_week " \ "where host_id='%s' and createtime > now() + '-30 day'" % ( host['ip']) cursor.execute(sql) result = cursor.fetchall() for row in result: datetime.append(int(row[0])) content = JSON.loads(row[1]) for record in content: kpiId = record['kpiId'] try: if kpiId == '9001': cpu.append(float(record['value'])) elif kpiId == '9002': memory.append(float(record['value'])) elif kpiId == '9010': process.append(int(record['value'])) except Exception as e: pass metrics['datetime'] = datetime metrics['cpu'] = cpu metrics['memory'] = memory metrics['process'] = process cursor.close() avg_cpu = 0 max_cpu = 0 if len(cpu) > 0: avg_cpu = round(numpy.mean(cpu), 1) max_cpu = round(numpy.max(cpu), 1) avg_memory = 0 max_memory = 0 if len(memory) > 0: avg_memory = round(numpy.mean(memory), 1) max_memory = round(numpy.max(memory), 1) avg_process = 0 if len(process) > 0: avg_process = int(numpy.mean(process)) return metrics, avg_cpu, avg_memory, avg_process, max_cpu, max_memory
def get_datetime(): ask = ["Year", "Month", "Day", "Hour (24-Hour)", "Minute"] # list containing strings to fill in the input statement datetime = [] # datetime list print("Enter the starting date and/or time for your countdown...") for word in ask: ans = input("%s: " % word) datetime.append(ans) # DEBUGGING debug(datetime) return
def getBalanceHistory(coin_fee): txid, unix, datetime, balance = [], [], [], [] for i in range(len(coin_fee.txid)): txid.append(coin_fee.txid[i]) txid = np.sort(txid) for i in range(len(txid)): for j in range(len(coin_fee.txid)): if txid[i] == coin_fee.txid[j]: unix.append(coin_fee.unix[j]) datetime.append(coin_fee.date[j] + coin_fee.time[j]) balance.append(coin_fee.balance[j]) balance_history = [unix, datetime, balance] return balance_history
def get_event(url): name = [] info = [] datetime = [] location = [] contact = [] phonenum = [] category = [] eventtype = [] content = get_page(url) eventdetails = geteventname(content) name.append(eventdetails[0]) content = eventdetails[1] eventdetails = geteventinfo(content) info.append(eventdetails[0]) content = eventdetails[1] eventdetails = getdatetime(content) datetime.append(eventdetails[0]) content = eventdetails[1] eventdetails = getlocation(content) location.append(eventdetails[0]) content = eventdetails[1] eventdetails = getcontact(content) contact.append(eventdetails[0]) content = eventdetails[1] eventdetails = getphonenum(content) phonenum.append(eventdetails[0]) content = eventdetails[1] eventdetails = getcategory(content) category.append(eventdetails[0]) content = eventdetails[1] eventdetails = geteventtype(content) eventtype.append(eventdetails[0]) content = eventdetails[1] alldata = [ name, info, datetime, location, contact, phonenum, category, eventtype ] writer = csv.writer(f, delimiter=',') writer.writerows([alldata])
def tor_vr(start,end,code,para): sql = "select datetime,close,tor,vr,vol from t_daydata where datetime between '"+start+"' and '"+end+"' and code = '"+code+"'" title = code+" ["+start+","+end+" ] "+str(para) datetime = [] close = [] tor = [] vr = [] vol = [] for row in hq._excutesql(sql): datetime.append(row[0]) close.append(row[1]) tor.append(row[2]) vr.append(row[3]) vol.append(row[4]/100000) # 创建子图 fig, ax = plt.subplots() #fig.subplots_adjust(bottom=0.2) plt.figure(1,figsize=(150, 130)) plt.subplot(212) # 设置X轴刻度为日期时间 ax.xaxis_date() plt.title(title) plt.xticks()#pd.date_range(start,end)) plt.yticks() #plt.xlabel("BLACK close,YELLOW tor,GREEN vr,BLUE vol") plt.ylabel("") #plt.plot(datetime,close,color = 'black') plt.plot(datetime, tor,color = 'yellow') plt.plot(datetime, vr,color = 'red') plt.xlabel("YELLOW tor,RED vr") #plt.plot(datetime, vol, color='blue') plt.grid() plt.subplot(221) plt.plot(datetime, vol, color='blue') plt.xlabel("BLUE vol") plt.grid() plt.subplot(222) plt.plot(datetime, close, color='black') plt.xlabel("BLACK close") plt.grid() fig.set_size_inches(15, 10) path = "C:/image/"+c.DATE.replace("-","")+"/" if not os.path.exists(path): os.mkdir(path) plt.savefig(path+title[:6]+".jpg") #plt.show() plt.close()
def his_data(req): # 初始化 response = [] #光照 datetime = [] #日期 # 通过objects这个模型管理器的all()获得所有数据行,相当于SQL中的SELECT * FROM list = HisData.objects.all() for var in list: response.append(float(var.Light_intensity)) datetime.append(str(var.Time)) return render_to_response("his_data.html", { 'list': list, 'lux': json.dumps(response), 'dtime': json.dumps(datetime) })
def get_event(url): name = [] info = [] datetime = [] location = [] contact = [] phonenum = [] category = [] eventtype = [] content = get_page(url) eventdetails = geteventname(content) name.append(eventdetails[0]) content = eventdetails[1] eventdetails = geteventinfo(content) info.append(eventdetails[0]) content = eventdetails[1] eventdetails = getdatetime(content) datetime.append(eventdetails[0]) content = eventdetails[1] eventdetails = getlocation(content) location.append(eventdetails[0]) content = eventdetails[1] eventdetails = getcontact(content) contact.append(eventdetails[0]) content = eventdetails[1] eventdetails = getphonenum(content) phonenum.append(eventdetails[0]) content = eventdetails[1] eventdetails = getcategory(content) category.append(eventdetails[0]) content = eventdetails[1] eventdetails = geteventtype(content) eventtype.append(eventdetails[0]) content = eventdetails[1] alldata = [name, info, datetime, location, contact, phonenum, category, eventtype] writer = csv.writer(f, delimiter = ',') writer.writerows([alldata])
def arrival_datetime_list(self, data): """ Retrieve list wich contains arrival datetimes """ datetime = [] try: for path, _, node in jxmlease.parse( data, generator= "tir38:TravelItinerary/tir38:ItineraryInfo/tir38:ReservationItems/tir38:Item/tir38:FlightSegment" ): datetime1 = node.get_xml_attr('ArrivalDateTime') datetime.append(str(datetime1)) except: datetime = ['N/A'] return datetime
def capture_ẗweet_data(tweets): # tweets is a list containing a number of BS4 tag objects user = [] datetime = [] tweet_text = [] comments = [] retweets = [] likes = [] tweet_url = [] for index, tweet in enumerate(tweets): print("fetching tweet number", index) user.append( tweet.select_one( "div[class='css-1dbjc4n r-18u37iz r-dnmrzs']").text) if tweet.select_one("time") is None: datetime.append("no datetime found") else: datetime.append(tweet.select_one("time")['datetime']) if tweet.select_one( "div[class='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0']" ) is None: tweet_text.append("no text found - could be vid of pic only") else: tweet_text.append( tweet.select_one( "div[class='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0']" ).text) social = tweet.select( "div[class='css-1dbjc4n r-1iusvr4 r-18u37iz r-16y2uox r-1h0z5md']") comments.append(social[0].text) retweets.append(social[1].text) likes.append(social[2].text) try: tweet_url.append( tweet.select_one( "a[class='css-4rbku5 css-18t94o4 css-901oao r-1re7ezh r-1loqt21 r-1q142lx r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-3s2u2q r-qvutc0']" )['href']) except: tweet_url.append("no url found") kle = pd.DataFrame(list( zip(user, datetime, tweet_text, comments, retweets, likes, tweet_url)), columns=[ "sender", "datetime", "text", "comments", "retweets", "likes", "tweet_url" ]) return kle
def market_index_kdd(self): Data = DataFrame() url_dict = {'미국 USD':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW', '일본 JPY':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW', '유럽연합 EUR':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW', '중국 CNY':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW'} for key in url_dict.keys(): date = [] value = [] for i in range(1,1000): url = re.get(url_dict[key] + '&page=%s'%i) url = url.content html = BeautifulSoup(url,'html.parser') tbody = html.find('tbody') tr = tbody.find_all('tr') '''마지막 페이지 까지 받기''' if len(tbody.text.strip()) > 3: for r in tr: temp_date = r.find('td',{'class':'date'}).text.replace('.','-').strip() temp_value = r.find('td',{'class':'num'}).text.strip() date.append(temp_date) value.append(temp_value) else: temp = DataFrame(value, index = date, columns = [key]) Data = pd.merge(Data, temp, how='outer', left_index=True, right_index=True) print(key + '자료 수집 완료') time.sleep(10) break print('=================== test 2 =================') # Data.to_csv('%s/market_index.csv'%(my_folder)) Data.to_csv('exchange_index.csv', encoding='utf-8-sig') print('==================== 환율 ok ============================') print(Data) return Data
def discretizeData(input_data, output_data): trans = preprocessing.LabelEncoder() output_data = trans.fit_transform(output_data) datetime, day, time = [], [], [] for inp in input_data: datetime.append(inp[0]) day.append(inp[1]) time.append(inp[2]) day = trans.fit_transform(day) time = trans.fit_transform(time) for i in range(len(input_data)): sensor_values = input_data[i][3:] input_data[i] = [datetime[i], day[i], time[i]] for value in sensor_values: input_data[i].append(value) return input_data, output_data
def pair_GPSGRACE(GPS_TS, GRACE_TS): # This resamples the GRACE data to match GPS that is within the range of GRACE, and forms a common time axis. gps_decyear = get_float_times(GPS_TS.dtarray) decyear = [] dt = [] north_gps = [] east_gps = [] vert_gps = [] N_err = [] E_err = [] V_err = [] u = [] v = [] w = [] for i in range( len(GPS_TS.dtarray) ): # this if-statement is happening because GPS is more current than GRACE if GPS_TS.dtarray[i] > min( GRACE_TS.dtarray) and GPS_TS.dtarray[i] < max( GRACE_TS.dtarray): decyear.append(gps_decyear[i]) dt.append(GPS_TS.dtarray[i]) north_gps.append(GPS_TS.dN[i]) east_gps.append(GPS_TS.dE[i]) vert_gps.append(GPS_TS.dU[i]) N_err.append(GPS_TS.Sn[i]) E_err.append(GPS_TS.Se[i]) V_err.append(GPS_TS.Su[i]) grace_u = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.u) grace_v = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.v) grace_w = np.interp(decyear, GRACE_TS.decyear, GRACE_TS.w) my_paired_ts = Paired_TS(dtarray=dt, north=north_gps, east=east_gps, vert=vert_gps, N_err=N_err, E_err=E_err, V_err=V_err, u=grace_u, v=grace_v, w=grace_w) return my_paired_ts
def market_index_crawling(): Data = DataFrame() url_dict = {'미국 USD':'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW', '국제 금':'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2'} for key in url_dict.keys(): date = [] value = [] for i in range(1,1000): url = re.get(url_dict[key] + '&page=%s'%i) url = url.content html = BeautifulSoup(url,'html.parser') tbody = html.find('tbody') tr = tbody.find_all('tr') '''마지막 페이지 까지 받기''' if len(tbody.text.strip()) > 3: for r in tr: temp_date = r.find('td',{'class':'date'}).text.replace('.','-').strip() temp_value = r.find('td',{'class':'num'}).text.strip() date.append(temp_date) value.append(temp_value) else: temp = DataFrame(value, index = date, columns = [key]) Data = pd.merge(Data,temp, how='outer', left_index=True, right_index=True) print(key + '자료 수집 완료') time.sleep(10) break Data.to_csv('%s/market_index.csv'%(folder_adress)) return Data
def useASOS(fname, RT): errorFile = open('%s_use_error_log.txt' % fname.split('.')[0], 'w') dataFile = open(fname, 'r') STA = fname.split('.')[0] dt = [] wind = [] bp = [] tdh = [] p = [] for x in dataFile: dt.append(dateASOS(x)) wind.append(windASOS(x, errorFile)) bp.append(baroASOS(x, errorFile)) tdh.append(tempASOS(x, errorFile)) p.append(rainASOS(x)) dataFile.close() errorFile.close() return dt, p, wind, bp, tdh
def other(self): sql = "select consume_time,sum(fee) fee,b.name from fee_record a,member b \ where a.member_id = b.id group by consume_time,name order by consume_time" dateList = db.query(sql).list() datetime = [] categories = {} dict1 = {} dict2 = {} for d in dateList: datetime.append(d.consume_time) categories["categories"] = datetime data1 = [12, 20] data2 = [20, 30] dict1["name"] = "张三" dict1["data"] = data1 dict2["name"] = "李死" dict2["data"] = data2 data = [dict1, dict2] categories["series"] = data web.header("Content-Type", "application/json") return json.dumps(categories, default=dthandle)
def search_next_empty_reserves_from_emptystate(cfg, cookies, headers, form_data): """ 利用日時と利用目的、地域を入力して空き予約を検索する """ global http_req_num global page_unit # フォームデータから年月日と開始時間を取得する datetime = [] datetime.append(str(form_data['layoutChildBody:childForm:year'])) datetime.append(str(form_data['layoutChildBody:childForm:month'])) datetime.append(str(form_data['layoutChildBody:childForm:day'])) datetime.append(str(form_data['layoutChildBody:childForm:stime'])) datetime.append(str(form_data['layoutChildBody:childForm:offset'])) # フォームデータを変更する # doPagerの値をsubmitに変更する form_data['layoutChildBody:childForm:doPager'] = 'submit' # 不要なフォームデータを削除する ## 「予約カートに追加」を削除する for _index in range(page_unit - 1): index_string_doAddCart = f'layoutChildBody:childForm:rsvEmptyStateItems:{_index}:doAddCart' #print(f'delete formdata: {index_string_doAddCart}') del form_data[f'{index_string_doAddCart}'] # 「予約カートの内容を確認」を削除する del form_data['layoutChildBody:childForm:jumpRsvCartList'] #print(form_data) # フォームデータからPOSTリクエストに含めるフォームデータをURLエンコードする params = urllib.parse.urlencode(form_data) # フォームデータを使って、空き予約を検索する response = requests.post(cfg['empty_state_url'], headers=headers, cookies=cookies, data=params) http_req_num += 1 # デバッグ用としてhtmlファイルとして保存する _datetime_string = str(datetime[0]) + str(datetime[1]).zfill(2) + str(datetime[2]).zfill(2) + str(datetime[3]).zfill(2) + str(datetime[4]).zfill(2) _file_name = f'result_{_datetime_string}.html' #print(_file_name) _file = reserve_tools.save_html_to_filename(response, _file_name) # レスポンスを返す return response
def LevenshteinDistance(string1, string2): n1=len(string1) n2=len(string2) if string1==string2: return 0 if n1==0 or n2==0: return n1+n2 #The d matriz will hold the Leventshtein distance d = [] for i in range(n1+1): row = [] for j in range(n2+1): if i==0 or j==0: row.append(i+j) #target prefixes else: if string1[i-1]==string2[j-1]: #same letter, no operation row.append(d[i-1][j-1]) else: minimo=min(d[i-1][j] + 1, #a deletion row[j-1]+1, #an insertion d[i-1][j-1] + 1) #a substitution row.append(minimo) d.append(row) return d[n1][n2], d, n1, n2
'rmax': rmax_slice, 'vmax': vmax_slice, 'roci': roci_slice, 'roci_miles': roci_slice_miles, 'lat': lat_slice, 'lon': lon_slice, 'pc': pc_slice, 'b': b_slice }) hurricane_df_clean = hurricane_df[hurricane_df.rmax != 0] datetime = [] for length in range(1, len(hurricane_df_clean), 4): datetime.append(int(hurricane_df_clean['datetime'].values[length].item())) conn_string = "dbname='hamlethurricane' user=postgres port='5432' host='127.0.0.1' password='******'" try: conn = psycopg2.connect(conn_string) except Exception as e: print str(e) sys.exit() impact_cur = conn.cursor() for key in range(1, (len(datetime) - 1)): sql = """create or replace view vw_county_impact_{} as select a.ctfips, avg(b."Windspeed")
import json import numpy as np import matplotlib.pyplot as plt conn = sqlite3.connect("sensehat.db") cursor = conn.cursor() sql = "select * from temp_humid;" cursor.execute(sql) result = cursor.fetchall() temp=[] humid=[] datetime=[] for tuple in result: temp.append(tuple[1]) humid.append(tuple[2]) date=tuple[3] dates=int(date[8:10]) datetime.append(dates) print(temp) fig = plt.figure() plt.plot(temp) plt.plot(humid) plt.xlabel('x-Axis') plt.ylabel('Temperature and Humidity ') plt.title('Temperature & Humidity Reading') plt.show() fig.savefig('Temperature_Humidity.png') conn.close()
ren_check = important_data[1][ x] in renewables #checks to see if the technology is a renewable if (ren_check == True): renewable_techs.append(important_data[1][x]) sizesax3.append(data[0][x]) labelax2 = ["Renewables", "Non-Renewables"] #percentage of renewable usage sizesax2 = [float(data[2][0]), 100 - float(data[2][0])] current_time = str(datetime.datetime.now()) #find the current date and time datetime = [] for x in range(0, 4, 1): #split probelm into 4 parts: day,month,year and time if (x == 0): datetime.append(current_time[8] + current_time[9] + "/") if (x == 1): datetime.append(current_time[5] + current_time[6] + "/") if (x == 2): datetime.append(current_time[0] + current_time[1] + current_time[2] + current_time[3]) if (x == 3): datetime.append(" " + current_time[11] + current_time[12] + current_time[13] + current_time[14] + current_time[15]) datetime = ''.join( datetime) #join all the elements of the array together without spaces ('') fig = plt.figure() #Renewable techs
def warframe_crawling(item, path, path_0): get_item = item get_path = path get_path_0 = path_0 site = 'https://api.warframe.market/v1/items/{get_item}/statistics'.format(get_item = get_item) res = requests.get(site) html = res.text soup = bs(html, 'html.parser') with open('/home/ec2-user/environment/warframets/data/json/warframe_data_v2.json', 'w') as file: data = str(soup) json_data = json.loads(data) json_data_1 = json.dumps(json_data, indent = 4) file.write(json_data_1) warframe_data = json_data_1 json_data = json.loads(warframe_data) result_data = pd.DataFrame(json_data['payload']['statistics_closed']['90days']) datetime = [] avg_price = [] volume = [] for i in result_data['datetime']: datecut = str(i) datetime.append(datecut[0:10]) for i in result_data['moving_avg']: avg_price.append(str(i)) for i in result_data['volume']: volume.append(str(i)) all_data_list = pd.DataFrame({'datetime' : datetime, 'avg_price' : avg_price, 'volume' : volume}) def make_file(item, path): get_item = item get_path = path if os.path.isfile(get_path): all_data_list.to_csv(get_path, mode = 'a', header = False) re_result = pd.read_csv(get_path, index_col = 0, error_bad_lines = False) all_result = re_result.drop_duplicates('datetime', keep = 'first') all_result.to_csv(get_path, mode = 'w') value = pandas_value.pandas_value(get_item, 'warframe') value.to_csv(get_path, mode = 'w') #print('데이터 업데이트를 완료했습니다.') else: all_data_list.to_csv(get_path, mode = 'w') value = pandas_value.pandas_value(get_item, 'warframe') value.to_csv(get_path, mode = 'w') #print('새로운 데이터를 저장했습니다.') if os.path.isdir(get_path_0): make_file(get_item, get_path) else: #print('폴더가 없음으로 새로 만들었습니다.') os.makedirs(get_path_0) make_file(get_item, get_path) print(str(get_item) + ' 업데이트를 하였습니다.')
def main(): st.set_option('deprecation.showfileUploaderEncoding', False) st.title('Análise mensagens Whatsapp') st.write( 'Obs: Nenhuma mensagem será salva ou utilizada, sua privacidade está totalmente segura.' ) st.write( 'Essa aplicação se trata de um código aberto que pode ser encontrado no Github: ' ) st.write('Meu LinkedIn: https://www.linkedin.com/in/andr%C3%A9-elias/') st.text( "__________________________________________________________________________________________" ) nltk.download('stopwords') st.write('Como conseguir o arquivo da conversa:') st.image('export.jpeg', width=250) st.write( '''Dentro da conversa aperte os '...' e depois clique em Exportar Conversa (SEM MÍDIA)''' ) arquivoConversa = st.file_uploader('FAÇA O UPLOAD AQUI') if arquivoConversa is not None: pat = re.compile( r'^(\d\d\/\d\d\/\d\d\d\d.*?)(?=^^\d\d\/\d\d\/\d\d\d\d|\Z)', re.S | re.M) with arquivoConversa as f: data = [ m.group(1).strip().replace('\n', ' ') for m in pat.finditer(f.read()) ] data.pop(0) sender = [] message = [] datetime = [] for row in data: datetime.append(row.split(' - ')[0]) try: s = re.search('- (.*?):', row).group(1) sender.append(s) except: sender.append('') try: message.append(row.split(': ', 1)[1]) except: message.append('') df = pd.DataFrame(zip(datetime, sender, message), columns=['datetime', 'sender', 'message']) df['datetime'] = pd.to_datetime(df.datetime, format='%d/%m/%Y %H:%M') df['date'] = df['datetime'].dt.date df['time'] = df['datetime'].dt.time df['weekDay'] = df['datetime'].dt.dayofweek df['timeHour'] = df['datetime'].dt.hour df['weekDay'] = df['weekDay'].replace({ 0: 'Segunda', 1: 'Terça', 2: 'Quarta', 3: 'Quinta', 4: 'Sexta', 5: 'Sábado', 6: 'Domingo' }) df['message'] = df['message'].replace( {'<Arquivo de mídia oculto>': '-MÍDIA-'}) names = df['sender'].unique() yourName = names[0] hisName = names[1] numMessage = df.groupby(['sender'])['message'].count().reset_index() st.text( "__________________________________________________________________________________________" ) st.subheader('Distribuição de mensagens') plt.figure(figsize=(15, 4)) ax = sns.barplot(x="message", y="sender", data=numMessage) ax.set(xlabel='Mensagens enviadas', ylabel='Remetente') sns.set(style="white", context="talk") st.pyplot() df['characters'] = df.message.apply(len) df['words'] = df.message.apply(lambda x: len(x.split())) textMean = df.groupby(['sender' ])['characters', 'words'].mean().round(2).reset_index() ax = sns.barplot(x="characters", y="sender", data=textMean) ax.set(xlabel='Média de caracteres por msg', ylabel='Remetente') st.pyplot() ax = sns.barplot(x="words", y="sender", data=textMean) ax.set(xlabel='Média de palavras por msg', ylabel='Remetente') st.pyplot() numMessageDay = df.groupby(['date'])['message'].count().reset_index() st.text( "__________________________________________________________________________________________" ) st.subheader('Distribuição tempo') plt.figure(figsize=(15, 4)) ax = sns.lineplot(data=numMessageDay, x="date", y="message", linewidth=5) ax.set(xlabel='Data', ylabel='Mensagens por dia') plt.setp(ax.get_xticklabels(), rotation=45) st.pyplot() numMessageHour = df.groupby(['timeHour' ])['message'].count().reset_index() plt.figure(figsize=(7, 7)) ax = sns.barplot(data=numMessageHour, x="timeHour", y="message") ax.set(xlabel='Hora do dia', ylabel='Mensagens') sns.set(style="white", context="talk") st.pyplot() numMessageWeek = df.groupby(['weekDay' ])['message'].count().reset_index() numMessageWeek['weekDay'] = pd.Categorical(numMessageWeek['weekDay'], categories=[ 'Segunda', 'Terça', 'Quarta', 'Quinta', 'Sexta', 'Sábado', 'Domingo' ], ordered=True) plt.figure(figsize=(15, 4)) sns.set(style="white", context="talk") ax = sns.barplot(data=numMessageWeek, x="message", y="weekDay") ax.set(xlabel='Dia da semana', ylabel='Mensagens') st.pyplot() yourWords = [] hisWords = [] for x in range(len(df['sender'])): if df['sender'][x] == yourName: yourWords.append(df['message'][x]) elif df['sender'][x] == hisName: hisWords.append(df['message'][x]) st.text( "__________________________________________________________________________________________" ) st.subheader('Mensagens em números') st.write('Total Mensagens: ', len(yourWords) + len(hisWords)) st.write('Suas mensagens: ', len(yourWords)) st.write('Mensagens do outro: ', len(hisWords)) s = ' ' totalYourWords = s.join(yourWords) totalHisWords = s.join(hisWords) pattern = re.compile('k*|-MÍDIA-|Kk*') totalYourWords = pattern.sub('', totalYourWords) totalHisWords = pattern.sub('', totalHisWords) stopWords = stopwords.words('portuguese') newStop = [ 'pra', 'tô', 'aí', 'tá', 'então', 'deu', 'aqui', 'né', 'vou', 'bem', 'coisa', 'tmb', 'vai' ] for x in newStop: stopWords.append(x) mapaCores = ListedColormap(['red', 'magenta', 'blue', 'green']) mask = np.array(Image.open('mask-cloud.png')) st.text( "__________________________________________________________________________________________" ) st.subheader('Nuvem de palavras') nuvem = WordCloud(width=1000, height=600, background_color='white', colormap=mapaCores, stopwords=stopWords, max_words=60, mask=mask) nuvem.generate(totalYourWords) plt.figure(figsize=(10, 10)) plt.imshow(nuvem) st.pyplot() mapaCores = ListedColormap(['red', 'magenta', 'blue', 'green']) nuvem = WordCloud(width=1000, height=600, background_color='white', colormap=mapaCores, stopwords=stopWords, max_words=60, mask=mask) nuvem.generate(totalHisWords) plt.figure(figsize=(10, 10)) plt.imshow(nuvem) st.pyplot() st.text( "__________________________________________________________________________________________" ) st.subheader('Emojis') yourEmoji = list(''.join(c for c in totalYourWords if c in emoji.UNICODE_EMOJI)) countYourEmoji = {i: yourEmoji.count(i) for i in yourEmoji} hisEmoji = list(''.join(c for c in totalHisWords if c in emoji.UNICODE_EMOJI)) countHisEmoji = {i: hisEmoji.count(i) for i in hisEmoji} dfYourEmoji = pd.DataFrame(countYourEmoji.items(), columns=['Emoji', 'Count']) dfYourEmoji = dfYourEmoji.sort_values(by=['Count'], ascending=False) st.table(dfYourEmoji) dfHisEmoji = pd.DataFrame(countHisEmoji.items(), columns=['Emoji', 'Count']) dfHisEmoji = dfHisEmoji.sort_values(by=['Count'], ascending=False) st.table(dfHisEmoji)
def market_index_crawling(self): folder_adress = '.' Data = DataFrame() url_dict = { '미국 USD': 'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_USDKRW', '일본 JPY': 'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_JPYKRW', '유럽연합 EUR': 'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_EURKRW', '중국 CNY': 'http://finance.naver.com/marketindex/exchangeDailyQuote.nhn?marketindexCd=FX_CNYKRW', 'WTI': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=OIL_CL&fdtc=2', '국제 금': 'http://finance.naver.com/marketindex/worldDailyQuote.nhn?marketindexCd=CMDT_GC&fdtc=2' } for key in url_dict.keys(): date = [] value = [] for i in range(1, 1000): url = re.get(url_dict[key] + '&page=%s' % i) url = url.content html = BeautifulSoup(url, 'html.parser') tbody = html.find('tbody') tr = tbody.find_all('tr') '''마지막 페이지 까지 받기''' if len(tbody.text.strip()) > 3: for r in tr: temp_date = r.find('td', { 'class': 'date' }).text.replace('.', '-').strip() temp_value = r.find('td', { 'class': 'num' }).text.strip() date.append(temp_date) value.append(temp_value) else: temp = DataFrame(value, index=date, columns=[key]) Data = pd.merge(Data, temp, how='outer', left_index=True, right_index=True) print(key + '자료 수집 완료') time.sleep(10) break Data.columns = [ '미국_USD', '일본_JPY', '유럽연합_EUR', '중국_CNY', 'WTI', '국제_금' ] # 데이터프레임의 인덱스를 일자컬럼으로 레벨을 변환시켜준다. #Data.columns = Data.columns.droplevel() Data = Data.reset_index() Data.rename(columns={'index': '일자'}, inplace=True) Data.to_csv('%s/market_world_index.csv' % folder_adress) # # DB저장 from sqlalchemy import create_engine # echo=True를 선언할 경우 실제 테이블 생성 쿼리문을 보여준다 engine = create_engine('sqlite:///itm_master.db', echo=True) #1. SQLite DB에 연결 #SQLite DB에 저장하기 위해 DB와 연결을 한다 con = sqlite3.connect("./itm_master.db") cursor = con.cursor() # DB CREAETE cursor.execute("drop table market_world_index ") cursor.execute( "create table market_world_index (일자,미국_USD,일본_JPY,유럽연합_EUR,중국_CNY,WTI,국제_금)" ) cursor.execute("delete from market_world_index ") # 지우고 다시 시작하자 con.commit() # 2. to_sql함수를 이용해서 DB에 저장 # sql 문장들 for ix, r in Data.iterrows(): # print (r) values = u"('%s','%s','%s','%s','%s','%s','%s')" % ( r['일자'], r['미국_USD'], r['일본_JPY'], r['유럽연합_EUR'], r['중국_CNY'], r['WTI'], r['국제_금']) insert_sql = u"insert into market_world_index( 일자,미국_USD,일본_JPY,유럽연합_EUR,중국_CNY,WTI,국제_금 ) values %s ;" % ( u"".join(values)) print(insert_sql) con.execute(insert_sql) con.commit() return Data
lt = [] for i in range(len(dl)): t2 = [] l2 = [] step = 7 t = [ dl[i][j:j + step] for j in range(0, len(dl[i])) if len(dl[i][j:j + step]) == step ] l = [list(map(itemgetter(0), i)) for i in t] for h in range(len(t) - step): dataall.append([m for m in t[h]]) label.append(max(l[h + step])) t2.append([m for m in t[h]]) l2.append(max(l[h + step])) dt.append(t2) lt.append(l2) # In[72]: dl2 = [i.values.tolist() for i in datanew2] dataall2 = [] label2 = [] dt2 = [] lt2 = [] for i in range(len(dl2)): t2 = [] l2 = [] step = 7 t = [ dl2[i][j:j + step] for j in range(0, len(dl2[i]))
def identify_nearest_puv(): # for testing lat = 14.64887259 lon = 121.06900107 destination = (lon, lat) destination_segment = locate_segment(coordinates = destination) print('Destination Segment: ', destination_segment) # read route data with open("ikot_route_test.json", "r") as read_file: route = json.load(read_file) # store information in the following lists PUV_id = [] segment_id = [] datetime = [] location = [] segs_before_dest = [] # query all segments for segment in route: # use midpoint as reference midpoint_latitude = segment["midpoint"][0] midpoint_longitude = segment["midpoint"][1] seg_length = segment["length"] # fetch data of cars near the segment NIMPA_URL = 'https://nimpala.me' NIMPA_CREDENTIALS = ('root', 'root') # Sean - not using &time=[seconds_elapsed] URL parameter. rest_operation = '/latest_area?lat={}&lon={}&time={}&radius={}'.format(str(midpoint_latitude), str(midpoint_longitude), 15, seg_length) complete_URL = NIMPA_URL + rest_operation # print('--- SENDING REQUEST TO:', complete_URL,' ----') response = requests.get(complete_URL, auth=NIMPA_CREDENTIALS) # print(response) # parse the fetched data data = 0 try: data = response.json() except Exception as e: pass data = response.text print(data) parsed = json.loads(data) # iterate through lists of PUV data if len(parsed) > 0: for PUV_data in parsed: PUV_id.append(PUV_data['vehicle_id']) segment_id.append(segment['segment_id']) datetime.append(PUV_data['datetime']['$date']) location.append(PUV_data['geojson']['coordinates']) # compute distance wrt destination segment segment_distance = destination_segment - segment['segment_id'] if segment_distance < 0: segment_distance = len(route) + segment_distance segs_before_dest.append(segment_distance) # convert PUV information dict to pandas DataFrame PUV_info_dict = {'PUV_id':PUV_id, 'segment_id':segment_id, 'datetime':datetime, 'location':location, 'segs_before_dest':segs_before_dest} PUV_df = pd.DataFrame.from_dict(PUV_info_dict) PUVs_to_compute_eta = [] if len(PUV_df) > 0: # group by PUV_id, sort by datetime df_by_PUV_id = PUV_df.groupby('PUV_id', as_index=False) \ .apply(lambda x: x.nlargest(1, columns=['datetime'])) \ .reset_index(level=1, drop=1) # .reset_index() # df_by_PUV_id = PUV_df.groupby('PUV_id').apply(pd.DataFrame.sort_values, 'datetime') # df_by_PUV_id = PUV_df.groupby('PUV_id').apply(lambda x: x.sort_values(['datetime'])).reset_index(drop=True) # print(df_by_PUV_id.loc[0]) # for name in df_by_PUV_id.index: # print(name) # print('segment: ', df_by_PUV_id['segment_id'].loc[name]) # print('location: ', df_by_PUV_id['location'].loc[name]) # print('no. of segments before dest: ', df_by_PUV_id['segs_before_dest'].loc[name]) # print(df_by_PUV_id.head()) # create new dataframe from aggregated groups df_by_PUV_id.index.name = None df_by_PUV_id.columns = ['PUV_id', 'segment_id', 'datetime', 'location', 'segs_before_dest'] top_puvs = df_by_PUV_id.sort_values('segs_before_dest').reset_index(drop=1).head(3) PUVs_to_compute_eta = top_puvs['PUV_id'].tolist() locs_to_vizualize = top_puvs['location'].tolist() print(top_puvs['location'].tolist()) return PUVs_to_compute_eta
# hourly domain basemaps, this takes lots of time if doing hourly. Switch to daily could be prudent over a long timespan ############################################ #save maps into the pdf file (two maps in single page) for i, sp in enumerate(var_list): t_days = int(len(airpact[sp])/24) temp = np.empty( ( t_days, 90, 90), '|U18') df_daily = {} for t in range(0,t_days): # Do daily average and MD8HA if sp == 'PMIJ': days=t t = t*24 temp[days] = (airpact[sp][t,:,:]+airpact[sp][t+1,:,:]+airpact[sp][t+2,:,:]+airpact[sp][t+3,:,:]+airpact[sp][t+4,:,:]+airpact[sp][t+5,:,:]+airpact[sp][t+6,:,:]+airpact[sp][t+7,:,:]+airpact[sp][t+8,:,:]+airpact[sp][t+9,:,:]+airpact[sp][t+10,:,:]+airpact[sp][t+11,:,:]+airpact[sp][t+12,:,:]+airpact[sp][t+13,:,:]+airpact[sp][t+14,:,:]+airpact[sp][t+15,:,:]+airpact[sp][t+16,:,:]+airpact[sp][t+17,:,:]+airpact[sp][t+18,:,:]+airpact[sp][t+19,:,:]+airpact[sp][t+20,:,:]+airpact[sp][t+21,:,:]+airpact[sp][t+22,:,:]+airpact[sp][t+23,:,:])/24 df_daily[sp] = temp datetime.append(airpact['DateTime'][t,0,0]) else: days=t t = t*24 temp[days] = (airpact[sp][t,:,:]+airpact[sp][t+1,:,:]+airpact[sp][t+2,:,:]+airpact[sp][t+3,:,:]+airpact[sp][t+4,:,:]+airpact[sp][t+5,:,:]+airpact[sp][t+6,:,:]+airpact[sp][t+7,:,:]+airpact[sp][t+8,:,:]+airpact[sp][t+9,:,:]+airpact[sp][t+10,:,:]+airpact[sp][t+11,:,:]+airpact[sp][t+12,:,:]+airpact[sp][t+13,:,:]+airpact[sp][t+14,:,:]+airpact[sp][t+15,:,:]+airpact[sp][t+16,:,:]+airpact[sp][t+17,:,:]+airpact[sp][t+18,:,:]+airpact[sp][t+19,:,:]+airpact[sp][t+20,:,:]+airpact[sp][t+21,:,:]+airpact[sp][t+22,:,:]+airpact[sp][t+23,:,:])/24 df_daily[sp] = temp datetime.append(airpact['DateTime'][t,0,0]) df_daily[sp] = df_daily[sp].astype(np.float) for t in range(0, len(df_daily[sp])): plt.style.use("dark_background") outpng = base_dir +'maps/daily_basemap/airpact_daily_basemap_tiled_' + sp + '_%05d.png' % t print(outpng) pm_max = 35
datetime = [] description = [] for source in soup.find_all( 'ytd-video-renderer', class_='style-scope ytd-expanded-shelf-contents-renderer', limit=10): title.append(source.find_all('a', {'id': 'video-title'})[0].string) channel.append( source.find_all( 'a', class_='yt-simple-endpoint style-scope yt-formatted-string') [0].string) view.append( source.find_all('span', class_='style-scope ytd-video-meta-block')[0].string) datetime.append( source.find_all('span', class_='style-scope ytd-video-meta-block')[1].string) description.append( source.find_all('yt-formatted-string', {'id': 'description-text'})[0].string) driver.close() youtube_output = [] for i in range(10): youtube_output.append( str(i + 1) + ". ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ \n" + str(title[i].strip()) + " \n" + str(channel[i]) + " | " + str(view[i]) + " | " + str(datetime[i]) + " \n\n" + str(description[i]) + " \n\n") token = 'your_token' slack = Slacker(token)
def _excread(path, encoding="utf-8"): """Dont call this directly, use excread() instead.""" logger = logging.getLogger('glodap.util.excread') skipfooter = 0 first = True signature = '' file_type = '' column_headers = [] column_units = [] line = None headerlines = 0 comments = "" sampl_depth_columns = [ 'CTDDEPTH', 'CTDDEP', 'CTDPRS', ] # Loop over the header to collect metadata and remove file type info with open(path, encoding=encoding) as excfile: while True: headerlines += 1 line = excfile.readline().strip() # Get the file type and signature if ( first and ( line.startswith('CTD') or line.startswith('BOTTLE') ) ): first = False matches = re.search('((BOTTLE)|(CTD))[, ](.*)$', line) signature = matches.group(4) file_type = matches.group(1) continue # ignore empty lines elif not line.strip(): continue # Keep comments as metadata elif line.startswith('#'): comments += line + "\n" continue else: # Register header lines if line.startswith('EXPOCODE'): column_headers = [s.strip() for s in line.split(',')] elif line.startswith(',,,'): column_units = [s.strip() for s in line.split(',')] else: break with FileReadBackwards(path, encoding=encoding) as fin: for line in fin: skipfooter += 1 if line.strip() == 'END_DATA': break data_types = { 'EXPOCODE': str, 'SECT_ID': str, 'DATE': str, 'TIME': str, } dataframe = pd.read_csv( path, names=column_headers, dtype=data_types, skiprows=headerlines, skipfooter=skipfooter, engine='python', encoding=encoding, ) # Strip leading and trailing whitespaces from string columns df_obj = dataframe.select_dtypes(['object']) dataframe[df_obj.columns] = df_obj.apply(lambda x: x.str.strip()) # If 'TIME' not present but 'HOUR' and 'MINUTE' is, then make time :) if (not 'TIME' in dataframe.columns and 'HOUR' in dataframe.columns and 'MINUTE' in dataframe.columns): dataframe['TIME'] = [ f'{d.HOUR:02}{d.MINUTE:02}' for i, d in dataframe.iterrows() ] # Add a datetime column if 'DATE' in dataframe.columns and 'TIME' in dataframe.columns: datetime = [] for ix, d in enumerate(dataframe['DATE']): try: t = dataframe['TIME'][ix] date='{}-{}-{}'.format(d[:4], d[4:6], d[6:]) time = '{}:{}'.format(t[:2], t[2:]) datetime.append(pd.to_datetime('{} {}'.format(date, time), utc=True)) except Exception as e: logger.error( 'Time format error (date: {}) (time: {}) on line {}' .format( d, t, ix + headerlines ) ) raise e dataframe['EXC_DATETIME'] = datetime # Try multiple sampling depth columns for name in sampl_depth_columns: if name in dataframe.columns: dataframe['EXC_CTDDEPTH'] = dataframe[name] break # Replace -9999, -999, -99, -9 with np.nan dataframe = dataframe.replace([-9999, -999, -99, -9], np.nan) # Add some extra metadata to the dataframe dataframe.whp_exchange.column_units = column_units dataframe.whp_exchange.signature = signature dataframe.whp_exchange.file_type = file_type dataframe.whp_exchange.comments = comments return dataframe
## python file i/o tlf = open("train_data/trainData_List.txt", "r") a = tlf.read().split('\n') a.pop(len(a) - 1) a.sort(reverse=True) for idx, val in enumerate(a): print(str(idx) + " : " + val) tf = open("train_data/" + "trainData-2020-1213-120627-916633" + ".txt", "r") dt = [] dataArr = tf.read().split("\n\n") dataArr.pop(len(dataArr) - 1) for data in dataArr: data = data.split("\n") dt.append(data) ## Numpy Mat Ops A = np.array([[1, 1], [2, 3]]) B = np.array([[1, 0], [0, 1]]) print(A.T) x = [1, 2.2, 3] y = np.array([[3, 6, 4]]).T pi = np.array([np.ones(len(x)).T, np.array(x).T]).T print(y.shape) print(pi) print(pi.shape) print(inv(pi.T @ pi) @ pi.T @ y)
def _excread(excfile): """Dont call this directly, use excread() instead.""" logger = logging.getLogger('glodap.util.excread') rewindto = 0 first = True signature = '' file_type = '' column_headers = [] column_units = [] line = None headerlines = 0 comments = "" # Loop over the header to collect metadata and remove file type info while True: headerlines += 1 rewindto = excfile.tell() line = excfile.readline().strip() # Get the file type and signature if ( first and ( line.startswith('CTD') or line.startswith('BOTTLE') ) ): first = False matches = re.search('((BOTTLE)|(CTD)),(.*)$', line) signature = matches.group(4) file_type = matches.group(1) continue # ignore empty lines elif not line: continue # Keep comments as metadata elif line.startswith('#'): comments += line + "\n" continue else: # Register header lines if line.startswith('EXPOCODE'): column_headers = line.split(',') elif line.startswith(',,,'): column_units = line.split(',') else: break excfile.seek(rewindto) data_types = { 'EXPOCODE': str, 'SECT_ID': str, 'DATE': str, 'TIME': str, } dataframe = pd.read_csv( excfile, names=column_headers, dtype=data_types, ) # Strip leading and trailing whitespaces from string columns df_obj = dataframe.select_dtypes(['object']) dataframe[df_obj.columns] = df_obj.apply(lambda x: x.str.strip()) # drop lines after and including END_DATA drop_lines = [] for val in reversed(range(len(dataframe))): drop_lines.append(val) if 'END_DATA' in dataframe.iloc[val, 0]: for line in drop_lines: dataframe = dataframe.drop(line, axis=0) break # Add a datetime column. If time is not present, time is set to 00:00 if 'DATE' in dataframe.columns and 'TIME' in dataframe.columns: datetime = [] for ix, d in enumerate(dataframe['DATE']): try: t = dataframe['TIME'][ix] date='{}-{}-{}'.format(d[:4], d[4:6], d[6:]) time = '{}:{}'.format(t[:2], t[2:]) datetime.append('{} {}'.format(date, time)) pd.to_datetime(datetime) except Exception as e: logger.error( 'Timer format error (date: {}) (time: {}) on line {}' .format( d, t, ix + headerlines ) ) raise e dataframe['EXC_DATETIME'] = datetime # Replace -9999, -999, -99, -9 with np.nan dataframe = dataframe.replace([-9999, -999, -99, -9], np.nan) # Add some extra metadata to the dataframe dataframe.whp_exchange.column_units = column_units dataframe.whp_exchange.signature = signature dataframe.whp_exchange.file_type = file_type dataframe.whp_exchange.comments = comments return dataframe