def main(options, args): '''\ %prog [options] <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) citationFile = args[0] patentidfile = args[1] trendFiles = args[2:] Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) for trendFile in trendFiles: logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend ' + str(len(Trend))) trendconnectivity = calculate_connectivity(Trend, Citation) testDic(trendconnectivity, 'trendConnectivity', 1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity, 1, 'connectivity') averageTrendAttribute(trendconnectivity, 4, 'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend, trendconnectivity, patentidlist) # testDic(patentConnectivity,'patentConnectivity') # the name of feature file name = trendFile.strip('./').rstrip('.csv')[:-5] if not os.path.exists('./feature'): os.makedirs('./feature') if 'LDA_Kmeans' in trendFile: name = 'LDA_Kmeans' elif 'Topic' in trendFile: name = 'Topic' elif 'TFIDF_Kmeans' in trendFile: name = 'TFIDF_Kmeans' elif 'Word' in trendFile: name = 'Word' featurefile = './feature/connectivity_feature_' + name + '.txt' store_dictionary(trendconnectivity, './feature/trend_connectivity_dic.csv') writefeature(patentConnectivity, patentidlist, featurefile, 5, '0') print '\n' return 0
def main(options,args): '''\ %prog [options] <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) citationFile = args[0] patentidfile = args[1] trendFiles = args[2:] Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) for trendFile in trendFiles: logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info( 'number of qualified trend '+str(len(Trend))) trendconnectivity = calculate_connectivity(Trend,Citation) testDic(trendconnectivity,'trendConnectivity',1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity,1,'connectivity') averageTrendAttribute(trendconnectivity,4,'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend,trendconnectivity,patentidlist) # testDic(patentConnectivity,'patentConnectivity') # the name of feature file name = trendFile.strip('./').rstrip('.csv')[:-5] if not os.path.exists('./feature'): os.makedirs('./feature') if 'LDA_Kmeans' in trendFile: name = 'LDA_Kmeans' elif 'Topic' in trendFile: name = 'Topic' elif 'TFIDF_Kmeans' in trendFile: name = 'TFIDF_Kmeans' elif 'Word' in trendFile: name = 'Word' featurefile = './feature/connectivity_feature_'+name+'.txt' store_dictionary(trendconnectivity,'./feature/trend_connectivity_dic.csv') writefeature(patentConnectivity,patentidlist,featurefile,5,'0') print '\n' return 0
def main(options, args): """\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... """ # if not os.path.exists(directory): # os.makedirs(directory) # patentidfile = '/Users/Leon/Documents/Research/Code/TrendCode/testdata/patent_id_list.txt' # patentidlist = readPatentIDList(patentidfile) # # random renew information based on pa # RenewInfo = {patent:str(randint(0,3)) for patent in patentidlist} # # testDic(RenewInfo,'renew dictionary') # # random date information # Date = {patent:randint(0,1000) for patent in patentidlist} # # testDic(Date,'date dictionary') # trendFile = '/Users/Leon/Documents/Research/Code/TrendCode/testdata/LDA_TopicTrend/20TopicTrend.csv' # Trend = load_trendFile(trendFile) # logging.info(os.path.basename(trendFile)) # logging.info( 'number of qualified trend '+str(len(Trend))) # name = os.path.basename(trendFile)[:-4]+'test' # # Renew_feature(Trend,RenewInfo,patentidlist,name) # Position_feature(Trend,Date,RenewInfo,patentidlist,name) # # ------------------- # test patentidlist = [str(i) for i in range(10)] Renew = {"1": "2", "0": "3", "3": "1", "2": "2", "5": "0", "4": "3", "7": "2", "6": "1", "9": "3", "8": "3"} Date = {"1": 123, "0": 401, "3": 542, "2": 423, "5": 338, "4": 492, "7": 484, "6": 356, "9": 451, "8": 242} Trend = {"0": ["0", "1", "2", "3", "4"], "1": ["5", "6", "7", "8", "9"], "2": ["3", "7", "9"]} trendName = "test" Citation = { "1": ["2", "3", "6", "8"], "2": ["5", "6", "9"], "3": ["9"], "4": ["8"], "5": [], "7": ["11"], "8": ["7"], "9": [], } # Position = Position_feature(Trend,Date,patentidlist) # featurefile = './feature/position_'+trendName+'.csv' # writefeature(Position,patentidlist,featurefile,3,'0') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.csv' # RenewFeature = Renew_feature(Trend,Renew,patentidlist) # writefeature(RenewFeature,patentidlist,renewfeaturefile,10,'0') # ----------------------------- # trend connectivity logging.info("***trend connectivity***") trendconnectivity = calculate_connectivity(Trend, Citation) testDic(trendconnectivity, "trendConnectivity", 1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity, 1, "connectivity") averageTrendAttribute(trendconnectivity, 4, "InOverAllCitation") patentConnectivity = connectivityTrend2Patent(Trend, trendconnectivity, patentidlist) testDic(patentConnectivity, "patentConnectivity", 1) connectivityFeatureFile = "./feature/connectivity_" + trendName + ".txt" store_dictionary(trendconnectivity, "./feature/trend_connectivity_" + trendName + ".csv") writefeature(patentConnectivity, patentidlist, connectivityFeatureFile, 5, "0") print
def main(options,args): '''\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' # if not os.path.exists(directory): # os.makedirs(directory) # patentidfile = '/Users/Leon/Documents/Research/Code/TrendCode/testdata/patent_id_list.txt' # patentidlist = readPatentIDList(patentidfile) # # random renew information based on pa # RenewInfo = {patent:str(randint(0,3)) for patent in patentidlist} # # testDic(RenewInfo,'renew dictionary') # # random date information # Date = {patent:randint(0,1000) for patent in patentidlist} # # testDic(Date,'date dictionary') # trendFile = '/Users/Leon/Documents/Research/Code/TrendCode/testdata/LDA_TopicTrend/20TopicTrend.csv' # Trend = load_trendFile(trendFile) # logging.info(os.path.basename(trendFile)) # logging.info( 'number of qualified trend '+str(len(Trend))) # name = os.path.basename(trendFile)[:-4]+'test' # # Renew_feature(Trend,RenewInfo,patentidlist,name) # Position_feature(Trend,Date,RenewInfo,patentidlist,name) # # ------------------- # test patentidlist = [str(i) for i in range(10)] Renew = {'1': '2', '0': '3', '3': '1', '2': '2', '5': '0', '4': '3', '7': '2', '6': '1', '9': '3', '8': '3'} Date = {'1': 123, '0': 401, '3': 542, '2': 423, '5': 338, '4': 492, '7': 484, '6': 356, '9': 451, '8': 242} Trend = {'0':['0','1','2','3','4'], '1':['5','6','7','8','9'], '2':['3','7','9']} trendName = 'test' Citation = {'1':['2','3','6','8'], '2':['5','6','9'], '3':['9'], '4':['8'], '5':[], '7':['11'], '8':['7'], '9':[]} # Position = Position_feature(Trend,Date,patentidlist) # featurefile = './feature/position_'+trendName+'.csv' # writefeature(Position,patentidlist,featurefile,3,'0') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.csv' # RenewFeature = Renew_feature(Trend,Renew,patentidlist) # writefeature(RenewFeature,patentidlist,renewfeaturefile,10,'0') # ----------------------------- # trend connectivity logging.info('***trend connectivity***') trendconnectivity = calculate_connectivity(Trend,Citation) testDic(trendconnectivity,'trendConnectivity',1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity,1,'connectivity') averageTrendAttribute(trendconnectivity,4,'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend,trendconnectivity,patentidlist) testDic(patentConnectivity,'patentConnectivity',1) connectivityFeatureFile = './feature/connectivity_'+trendName+'.txt' store_dictionary(trendconnectivity,'./feature/trend_connectivity_'+trendName+'.csv') writefeature(patentConnectivity,patentidlist,connectivityFeatureFile,5,'0') print
def main(options, args): '''\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) # input windowSize = options.windowSize renewfile = args[0] datefile = args[1] citationFile = args[2] patentidfile = args[3] trendFiles = args[4:] logging.info('reading Citation,id list, renew, Date information') Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) RenewInfo = read_renew_file(renewfile) Date = load_dateFile(datefile) logging.info('finished reading') for trendFile in trendFiles: trendName = os.path.basename(trendFile)[:-4] subdirectory = os.path.join(directory, trendName) if not os.path.exists(subdirectory): os.makedirs(subdirectory) logging.info('----------------------------------') logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend ' + str(len(Trend))) # ----------------------------- # trend connectivity logging.info('***trend connectivity***') trendconnectivity = calculate_connectivity(Trend, Citation) testDic(trendconnectivity, 'trendConnectivity', 1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity, 1, 'connectivity') averageTrendAttribute(trendconnectivity, 4, 'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend, trendconnectivity, patentidlist) testDic(patentConnectivity, 'patentConnectivity', 1) # connectivityFeatureFile = './feature/connectivity_'+trendName+'.txt' connectivityFeatureFile = os.path.join(subdirectory, 'connectivity.txt') # store_dictionary(trendconnectivity,'./feature/trend_connectivity_'+trendName+'.csv') store_dictionary( trendconnectivity, os.path.join(subdirectory, 'trend_connectivity_' + trendName + '.csv')) writefeature(patentConnectivity, patentidlist, connectivityFeatureFile, 5, '0') print # ------------------------------- # the trendslope feature # calculate the number of patents in all trend logging.info('***trend slope***') totall = 0 for label, patentlist in Trend.iteritems(): totall += len(patentlist) logging.info('number of toall patents,including duplicates: ' + str(totall)) TrendShpes = {} for label, patentlist in Trend.iteritems(): Shape = drawTrend(patentlist, Date) TrendShpes[label] = Shape # assume each patent in one trend Slope = calculateSlope(Trend, TrendShpes, Date, windowSize) testDic(Slope, 'Slope', 1) # slopeFeaturefile = './feature/slope_'+trendName+'.txt' slopeFeaturefile = os.path.join(subdirectory, 'slope.txt') writefeature(Slope, patentidlist, slopeFeaturefile) print # --------------------------------- # the position feature for each patent logging.info('***patent position***') Position = Position_feature(Trend, Date, patentidlist) testDic(Position, 'Position', 1) # featurefile = './feature/position_'+trendName+'.txt' positionfile = os.path.join(subdirectory, 'position.txt') writefeature(Position, patentidlist, positionfile, 3, '0') print # ----------------------------- # the renew feature for each patent logging.info('***trend renew feature***') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.txt' renewfeaturefile = os.path.join(subdirectory, 'renew.txt') RenewFeature = Renew_feature(Trend, RenewInfo, patentidlist) testDic(RenewFeature, 'RenewFeature', 1) writefeature(RenewFeature, patentidlist, renewfeaturefile, 10, '0') return 0
def main(options,args): '''\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) # input windowSize = options.windowSize renewfile = args[0] datefile = args[1] citationFile = args[2] patentidfile = args[3] trendFiles = args[4:] logging.info('reading Citation,id list, renew, Date information') Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) RenewInfo = read_renew_file(renewfile) Date = load_dateFile(datefile) logging.info('finished reading') for trendFile in trendFiles: trendName = os.path.basename(trendFile)[:-4] subdirectory = os.path.join(directory,trendName) if not os.path.exists(subdirectory): os.makedirs(subdirectory) logging.info('----------------------------------') logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend '+str(len(Trend))) # ----------------------------- # trend connectivity logging.info('***trend connectivity***') trendconnectivity = calculate_connectivity(Trend,Citation) testDic(trendconnectivity,'trendConnectivity',1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity,1,'connectivity') averageTrendAttribute(trendconnectivity,4,'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend,trendconnectivity,patentidlist) testDic(patentConnectivity,'patentConnectivity',1) # connectivityFeatureFile = './feature/connectivity_'+trendName+'.txt' connectivityFeatureFile = os.path.join(subdirectory,'connectivity.txt') # store_dictionary(trendconnectivity,'./feature/trend_connectivity_'+trendName+'.csv') store_dictionary(trendconnectivity,os.path.join(subdirectory,'trend_connectivity_'+trendName+'.csv')) writefeature(patentConnectivity,patentidlist,connectivityFeatureFile,5,'0') print # ------------------------------- # the trendslope feature # calculate the number of patents in all trend logging.info('***trend slope***') totall = 0 for label, patentlist in Trend.iteritems(): totall += len(patentlist) logging.info('number of toall patents,including duplicates: '+str(totall)) TrendShpes = {} for label, patentlist in Trend.iteritems(): Shape = drawTrend(patentlist,Date) TrendShpes[label] = Shape # assume each patent in one trend Slope = calculateSlope(Trend,TrendShpes,Date,windowSize) testDic(Slope,'Slope',1) # slopeFeaturefile = './feature/slope_'+trendName+'.txt' slopeFeaturefile = os.path.join(subdirectory,'slope.txt') writefeature(Slope,patentidlist,slopeFeaturefile) print # --------------------------------- # the position feature for each patent logging.info('***patent position***') Position = Position_feature(Trend,Date,patentidlist) testDic(Position,'Position',1) # featurefile = './feature/position_'+trendName+'.txt' positionfile = os.path.join(subdirectory,'position.txt') writefeature(Position,patentidlist,positionfile,3,'0') print # ----------------------------- # the renew feature for each patent logging.info('***trend renew feature***') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.txt' renewfeaturefile = os.path.join(subdirectory,'renew.txt') RenewFeature = Renew_feature(Trend,RenewInfo,patentidlist) testDic(RenewFeature,'RenewFeature',1) writefeature(RenewFeature,patentidlist,renewfeaturefile,10,'0') return 0