def get_LDA_doc(): ms = MSSSQL(host="172.28.4.193", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select wsname,summary,categoryid,tags,Description from dbo.ws") ws = {} for (wsname,summary,categoryid,tags,Description) in resList: s1 = wsname.strip().lower().replace(' ','-') ll = summary+' '+categoryid+' '+tags+' '+Description ll = ll.replace('\n',' ') ws[s1] = ll mp = {} resList = ms.ExeQuery("select wsname,summary,categoryid,tags,Description from dbo.mp") for (wsname,summary,categoryid,tags,Description) in resList: s1 = wsname.strip().lower().replace(' ','-') ll = summary + ' ' + categoryid + ' ' + tags + ' ' + Description ll = ll.replace('\n', ' ') ws[s1] = ll input = open('webservice.txt') output = open('LDA_train.txt','w') cc = 0 for line in input: s = line.strip('\r\n').split(' ') cc+=1 if ws.has_key(s[0]): output.write(ws[s[0]]+'\r\n') elif mp.has_key(s[0]): output.write(mp[s[0]]+'\r\n') else: output.write(s[0]+'\r\n') print cc input.close() output.close()
def test(): t = write_api_mashupToTxt() ms = MSSSQL(host="172.28.4.193", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select * from dbo.watchlist") for (wsname, url, id, date) in resList: str_array = url.split('/') if str_array[-1] in t: print wsname, url
def xiuzheng(): output = open('name_replace_1.txt', 'w') t = write_api_mashupToTxt() print len(t) ms = MSSSQL(host="172.28.4.193", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select wsname,APIhome from dbo.mp") for (wsname, APIhome) in resList: s = APIhome.strip().split('/') if s[-2] in t: output.write(wsname.lower().strip().replace(' ', '-') + ' ' + s[-2] + ' ' + '1' + '\r\n') t.remove(s[-2]) print len(t) print t
def get_user(): ms = MSSSQL(host="172.28.4.193", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select * from dbo.watchlist") user_name = {} for wsname, url, id, date in resList: if user_name.has_key(wsname): user_name[wsname] += 1 else: user_name[wsname] = 1 s_name = [] count = 0 for key, value in user_name.items(): if value > 50: ##print key,value s_name.append(key) count += 1 ##print count ##print len(s_name) return s_name
def count_api_mashup(): ms = MSSSQL(host="172.28.4.193", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select wsname from dbo.ws") apiAll = [] mashupAll = [] ##output1 = open('api.txt','w') ##output2 = open('mashup.txt','w') for (wsname, ) in resList: s = wsname.strip().replace(' ', '-') apiAll.append(s.upper()) ##output1.write(s.upper()+'\r\n') resList1 = ms.ExeQuery("select wsname from dbo.mp") for (wsname, ) in resList1: s = wsname.strip().replace(' ', '-') mashupAll.append(s.upper()) ##output2.write(s.upper()+'\r\n') return apiAll, mashupAll
set_time = {} c = 0 for line in input3: s = line.strip('\r\n').split(' ') t = d + datetime.timedelta(seconds=int(s[2])) ut = (t-t_user[s[0]]).days if ut<0: ut = 0 ''' input5 = open('name_replace.txt') rep = {} for line in input5: s = line.strip('\r\n').split(' ') rep[s[1]] = s[0] ms = MSSSQL(host="172.28.37.29", user="******", pwd="wy9756784750", db="pweb") resList = ms.ExeQuery("select * from dbo.watchlist") c = 0 for wsname,url,id,date in resList: str_array = url.split('/') url = str_array[-1] url = url.replace('%2Fcomments', '') if rep.has_key(url): url = rep[url] t = transfer_time(date) if t_s.has_key(url): if t<t_s[url]: t_s[url] = t if t_user.has_key(wsname): if t<t_user[wsname]: t_user[wsname] = t