def main(options, args): '''\ %prog [options] <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) citationFile = args[0] patentidfile = args[1] trendFiles = args[2:] Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) for trendFile in trendFiles: logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend ' + str(len(Trend))) trendconnectivity = calculate_connectivity(Trend, Citation) testDic(trendconnectivity, 'trendConnectivity', 1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity, 1, 'connectivity') averageTrendAttribute(trendconnectivity, 4, 'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend, trendconnectivity, patentidlist) # testDic(patentConnectivity,'patentConnectivity') # the name of feature file name = trendFile.strip('./').rstrip('.csv')[:-5] if not os.path.exists('./feature'): os.makedirs('./feature') if 'LDA_Kmeans' in trendFile: name = 'LDA_Kmeans' elif 'Topic' in trendFile: name = 'Topic' elif 'TFIDF_Kmeans' in trendFile: name = 'TFIDF_Kmeans' elif 'Word' in trendFile: name = 'Word' featurefile = './feature/connectivity_feature_' + name + '.txt' store_dictionary(trendconnectivity, './feature/trend_connectivity_dic.csv') writefeature(patentConnectivity, patentidlist, featurefile, 5, '0') print '\n' return 0
def main(options,args): '''\ %prog [options] <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) citationFile = args[0] patentidfile = args[1] trendFiles = args[2:] Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) for trendFile in trendFiles: logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info( 'number of qualified trend '+str(len(Trend))) trendconnectivity = calculate_connectivity(Trend,Citation) testDic(trendconnectivity,'trendConnectivity',1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity,1,'connectivity') averageTrendAttribute(trendconnectivity,4,'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend,trendconnectivity,patentidlist) # testDic(patentConnectivity,'patentConnectivity') # the name of feature file name = trendFile.strip('./').rstrip('.csv')[:-5] if not os.path.exists('./feature'): os.makedirs('./feature') if 'LDA_Kmeans' in trendFile: name = 'LDA_Kmeans' elif 'Topic' in trendFile: name = 'Topic' elif 'TFIDF_Kmeans' in trendFile: name = 'TFIDF_Kmeans' elif 'Word' in trendFile: name = 'Word' featurefile = './feature/connectivity_feature_'+name+'.txt' store_dictionary(trendconnectivity,'./feature/trend_connectivity_dic.csv') writefeature(patentConnectivity,patentidlist,featurefile,5,'0') print '\n' return 0
def main(options, args): '''\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) # input windowSize = options.windowSize renewfile = args[0] datefile = args[1] citationFile = args[2] patentidfile = args[3] trendFiles = args[4:] logging.info('reading Citation,id list, renew, Date information') Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) RenewInfo = read_renew_file(renewfile) Date = load_dateFile(datefile) logging.info('finished reading') for trendFile in trendFiles: trendName = os.path.basename(trendFile)[:-4] subdirectory = os.path.join(directory, trendName) if not os.path.exists(subdirectory): os.makedirs(subdirectory) logging.info('----------------------------------') logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend ' + str(len(Trend))) # ----------------------------- # trend connectivity logging.info('***trend connectivity***') trendconnectivity = calculate_connectivity(Trend, Citation) testDic(trendconnectivity, 'trendConnectivity', 1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity, 1, 'connectivity') averageTrendAttribute(trendconnectivity, 4, 'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend, trendconnectivity, patentidlist) testDic(patentConnectivity, 'patentConnectivity', 1) # connectivityFeatureFile = './feature/connectivity_'+trendName+'.txt' connectivityFeatureFile = os.path.join(subdirectory, 'connectivity.txt') # store_dictionary(trendconnectivity,'./feature/trend_connectivity_'+trendName+'.csv') store_dictionary( trendconnectivity, os.path.join(subdirectory, 'trend_connectivity_' + trendName + '.csv')) writefeature(patentConnectivity, patentidlist, connectivityFeatureFile, 5, '0') print # ------------------------------- # the trendslope feature # calculate the number of patents in all trend logging.info('***trend slope***') totall = 0 for label, patentlist in Trend.iteritems(): totall += len(patentlist) logging.info('number of toall patents,including duplicates: ' + str(totall)) TrendShpes = {} for label, patentlist in Trend.iteritems(): Shape = drawTrend(patentlist, Date) TrendShpes[label] = Shape # assume each patent in one trend Slope = calculateSlope(Trend, TrendShpes, Date, windowSize) testDic(Slope, 'Slope', 1) # slopeFeaturefile = './feature/slope_'+trendName+'.txt' slopeFeaturefile = os.path.join(subdirectory, 'slope.txt') writefeature(Slope, patentidlist, slopeFeaturefile) print # --------------------------------- # the position feature for each patent logging.info('***patent position***') Position = Position_feature(Trend, Date, patentidlist) testDic(Position, 'Position', 1) # featurefile = './feature/position_'+trendName+'.txt' positionfile = os.path.join(subdirectory, 'position.txt') writefeature(Position, patentidlist, positionfile, 3, '0') print # ----------------------------- # the renew feature for each patent logging.info('***trend renew feature***') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.txt' renewfeaturefile = os.path.join(subdirectory, 'renew.txt') RenewFeature = Renew_feature(Trend, RenewInfo, patentidlist) testDic(RenewFeature, 'RenewFeature', 1) writefeature(RenewFeature, patentidlist, renewfeaturefile, 10, '0') return 0
def main(options,args): '''\ %prog [options]<renewfile> <datefile> <citationFile> <patentidfile> <TrnedFile1> <TrnedFile2>... ''' if not os.path.exists(directory): os.makedirs(directory) # input windowSize = options.windowSize renewfile = args[0] datefile = args[1] citationFile = args[2] patentidfile = args[3] trendFiles = args[4:] logging.info('reading Citation,id list, renew, Date information') Citation = load_dictionary(citationFile) patentidlist = readPatentIDList(patentidfile) RenewInfo = read_renew_file(renewfile) Date = load_dateFile(datefile) logging.info('finished reading') for trendFile in trendFiles: trendName = os.path.basename(trendFile)[:-4] subdirectory = os.path.join(directory,trendName) if not os.path.exists(subdirectory): os.makedirs(subdirectory) logging.info('----------------------------------') logging.info(os.path.basename(trendFile)) Trend = load_trendFile(trendFile) # Trend = filter_By_Year(Trend,Date,1981,1998) # Trend = filter_By_Number(Trend,100) logging.info('number of qualified trend '+str(len(Trend))) # ----------------------------- # trend connectivity logging.info('***trend connectivity***') trendconnectivity = calculate_connectivity(Trend,Citation) testDic(trendconnectivity,'trendConnectivity',1) # print out the average value of connectivity averageTrendAttribute(trendconnectivity,1,'connectivity') averageTrendAttribute(trendconnectivity,4,'InOverAllCitation') patentConnectivity = connectivityTrend2Patent(Trend,trendconnectivity,patentidlist) testDic(patentConnectivity,'patentConnectivity',1) # connectivityFeatureFile = './feature/connectivity_'+trendName+'.txt' connectivityFeatureFile = os.path.join(subdirectory,'connectivity.txt') # store_dictionary(trendconnectivity,'./feature/trend_connectivity_'+trendName+'.csv') store_dictionary(trendconnectivity,os.path.join(subdirectory,'trend_connectivity_'+trendName+'.csv')) writefeature(patentConnectivity,patentidlist,connectivityFeatureFile,5,'0') print # ------------------------------- # the trendslope feature # calculate the number of patents in all trend logging.info('***trend slope***') totall = 0 for label, patentlist in Trend.iteritems(): totall += len(patentlist) logging.info('number of toall patents,including duplicates: '+str(totall)) TrendShpes = {} for label, patentlist in Trend.iteritems(): Shape = drawTrend(patentlist,Date) TrendShpes[label] = Shape # assume each patent in one trend Slope = calculateSlope(Trend,TrendShpes,Date,windowSize) testDic(Slope,'Slope',1) # slopeFeaturefile = './feature/slope_'+trendName+'.txt' slopeFeaturefile = os.path.join(subdirectory,'slope.txt') writefeature(Slope,patentidlist,slopeFeaturefile) print # --------------------------------- # the position feature for each patent logging.info('***patent position***') Position = Position_feature(Trend,Date,patentidlist) testDic(Position,'Position',1) # featurefile = './feature/position_'+trendName+'.txt' positionfile = os.path.join(subdirectory,'position.txt') writefeature(Position,patentidlist,positionfile,3,'0') print # ----------------------------- # the renew feature for each patent logging.info('***trend renew feature***') # renewfeaturefile = './feature/renew_percenage_'+trendName+'.txt' renewfeaturefile = os.path.join(subdirectory,'renew.txt') RenewFeature = Renew_feature(Trend,RenewInfo,patentidlist) testDic(RenewFeature,'RenewFeature',1) writefeature(RenewFeature,patentidlist,renewfeaturefile,10,'0') return 0