def add_features(self): f = Features.Features(self.df) f.add_bbands() f.compute_side_min() f.add_momantum() f.add_volatility() f.add_serial_correlation() f.add_log_returns() f.mov_average() f.add_rsi() f.add_srsi() f.add_trending_signal()
def do_job(i, chunk): file_name = "features_{}.h5".format(i) df = pd.read_hdf(path.join(chunk_path, chunk), key = 'table') features_for_this_chunk = [] for driver, trip in df.groupby(level = ['Driver', 'Trip']): # Extract all features for each trip series_features = feat.Features(trip, features).extract_all_features() features_for_this_chunk.append(series_features) # Create a data frame with the features df_features_for_this_chunk = pd.DataFrame() df_features_for_this_chunk = df_features_for_this_chunk.from_dict(features_for_this_chunk) # Write data frames containing the features to HDF5 file df_features_for_this_chunk.to_hdf('/scratch/vstrobel/features32_small/' + file_name, 'table') print("Written to", file_name)
def get_xy(file_conllu, file_features, file_embedding=None): mcd = get_mcd() print("Chargement des arbres") obj_generateAlltree = ConstructAllTree(file_conllu, mcd, True) # print(obj_generateAlltree.get_corpus()) # print(obj_generateAlltree.get_vocabulary()) all_tree = obj_generateAlltree.get_allTreeProjectiviser() # print(all_tree[0].print_tree()) print("Arbres charger : ", len(all_tree)) print("Création du dataset") features = Features(file_features) i = 0 for tree in all_tree: # tree.print_tree() # if i != 43 and i != 61: A = Oracle(tree, features) A.run() # # print(features.datas) # # print(features.labels_encoders) print("Convertion du dataset") print("file_embedding : ", file_embedding) X, Y = features.get_Data_Set(file_embedding) labels_encoderX = features.get_label_encoderX() labels_encoderY = features.get_label_encoderY() print("X_train_shape", X.shape) print("Y_train_shape", Y.shape) return X, Y, labels_encoderX, labels_encoderY, all_tree
def ExtractAllFeatures(self, hours): #Create the Object of Feature Class features = Features.Features() #1. Need to sort the posts dict in ascending order of timestamp, then view the posts. allPosts = self.documentDict[self.POSTS] postDict, sortedKeys = self.SortByPostCreationTime(allPosts) #2. Questioners Reputation self.GetQuestionersReputation() #3. Questions asked by questionaire userDict = {} #this will store userID and questions asked by the user userList = self.documentDict[self.USERS] for postKey in sortedKeys: #posts sorted by creation time if postDict[ postKey].PostTypeId == '1': # If id is 1, it is a question #search the user who has asked the question userID = postDict[postKey].OwnerUserId #for this userID enter a new user in dictionary or update the 'question asked until this post' in the existing entry try: #try updating userDict[userID].questionAsked += 1 except: #create new pbject of User class newUser = QA.User(userID) newUser.questionAsked = 0 userDict[ userID] = newUser #initialize the number of questions asked by the user previous to asking this question #we know the post ID of this question. We can append this feature value in the self.QuestionAnswerPairs dict currentPostId = postDict[postKey].Id #To the current Post ID add the feature - question asked by the questionaire before asking the current question self.QuestionAnswerPairs[ currentPostId].F2_QuesAskedByQuestionaire = userDict[ userID].questionAsked #self.QuestionAnswerPairs[currentPostId].Owner = userList[userID] #add the original users object a = 9 #4. Num Answers to Questions within one hour and Sum of their Scores self.FindNumAnswerAndTheirScoresInXHours(hours) #8. NumComments in Q/A of highest reputation user of the answer of current post self.NumCommentsInQAOfHighRepUser(hours) for key, qaPairs in self.QuestionAnswerPairs.items(): feature = Features.Feature() feature.F1_QuestionersReputation = qaPairs.F1_QuestionersReputation feature.F2_QuesAskedByQuestionaire = qaPairs.F2_QuesAskedByQuestionaire feature.F3_NumAnswerToQuestionInXHours = qaPairs.F3_NumAnswerToQuestionInXHours feature.F4_SumScores = qaPairs.F4_SumScores feature.F5_BestScoreAnswerLength = qaPairs.F5_BestScoreAnswerLength feature.F6_BestScoreNumComments = qaPairs.F6_BestScoreNumComments feature.F7_BestScoreTimeDiff = qaPairs.F7_BestScoreTimeDiff feature.F8_ReputedUserNumComments = qaPairs.F8_ReputedUserNumComments feature.Y_Label_FrequentlyViewed = qaPairs.Y_Label_FrequentlyViewed features.featureList.append(feature) return features.featureList pass
def generateFeaturesLine(self, lineSegment, pDict): """Generate the features for only a bisegment""" sLang = pDict["sourceLanguage"] dLang = pDict["targetLanguage"] featuresDict = {} id1, id2, sourceSegment, targetSegment, detectedSLangs, detectedDLangs = lineSegment.split( "@#@") sourceSegment = self.cleanSegment(sourceSegment) targetSegment = self.cleanSegment(targetSegment) features = Features(sourceSegment, targetSegment, pDict["fileRe"]) #Church Gale score featuresDict["cgscore"] = features.getCGSore() featuresDict["same"] = features.isEqual() #Capital Letters featuresDict["hassourcecl"], featuresDict[ "hastargetcl"] = features.haveCL() featuresDict["caplettersworddif"] = features.difWordsCapitalLetters() featuresDict[ "onlycaplettersdif"] = features.difWholeWordsCapitalLetters() #Language Related Features: 0.0 featuresDict["langdif"] = features.getLangScore( detectedSLangs, detectedDLangs, sLang, dLang) #URL and Similarity featuresDict["hassourceurl"], featuresDict[ "hastargeturl"] = features.haveItem("urlRe") featuresDict["urlsim"] = round(features.getItemSimilarity("urlRe"), 2) #TAG and Similarity featuresDict["hassourcetag"], featuresDict[ "hastargettag"] = features.haveItem("tagRe") featuresDict["tagsim"] = round(features.getItemSimilarity("tagRe"), 2) #EMAIL and Similarity featuresDict["hassourceemail"], featuresDict[ "hastargetemail"] = features.haveItem("emailRe") featuresDict["emailsim"] = round(features.getItemSimilarity("emailRe"), 2) #NUMBER and Similarity featuresDict["hassourcenumber"], featuresDict[ "hastargetnumber"] = features.haveItem("numberRe") featuresDict["numbersim"] = round( features.getNumberSimilarity("numberRe"), 2) #PUNCTUATION and Similarity featuresDict["hassourcepunctuation"], featuresDict[ "hastargetpunctuation"] = features.havePunctuation() featuresDict["punctsim"] = round(features.getPunctuationSimilarity(), 2) #Name Entity Detection and Similarity featuresDict["hassourcenameentity"], featuresDict[ "hastargetnameentity"] = features.haveNameEntity() featuresDict["nersimilarity"] = features.getNameEntitiesSimilarity() return featuresDict
def generateFeatures(self, pDict): """It reads the bisegments from the file and computes the features for each of them""" sLang = pDict["sourceLanguage"] dLang = pDict["targetLanguage"] segmentFeaturesDict = collections.OrderedDict() fs = codecs.open(pDict["segmentsLangFile"], "r", "utf-8") for lineSegment in fs: featuresDict = {} lineSegment = lineSegment.rstrip() dictKey1, dictKey2, sourceSegment, targetSegment, detectedSLangs, detectedDLangs = lineSegment.split( "@#@") dictKey = dictKey1 + "-" + dictKey2 sourceSegment = self.cleanSegment(sourceSegment) targetSegment = self.cleanSegment(targetSegment) features = Features(sourceSegment, targetSegment, pDict["fileRe"]) #---------Features that are not used in ML --------------- featuresDict["sourcesegment"] = sourceSegment featuresDict["targetsegment"] = targetSegment featuresDict["nwordssource"], featuresDict[ "nwordstarget"] = features.getNWords() #--------Features used in ML------------------ #Church Gale score featuresDict["cgscore"] = features.getCGSore() featuresDict["same"] = features.isEqual() #Capital Letters featuresDict["hassourcecl"], featuresDict[ "hastargetcl"] = features.haveCL() featuresDict[ "caplettersworddif"] = features.difWordsCapitalLetters() featuresDict[ "onlycaplettersdif"] = features.difWholeWordsCapitalLetters() #Language Related Features: 0.0 featuresDict["langdif"] = features.getLangScore( detectedSLangs, detectedDLangs, sLang, dLang) #URL and Similarity featuresDict["hassourceurl"], featuresDict[ "hastargeturl"] = features.haveItem("urlRe") featuresDict["urlsim"] = round(features.getItemSimilarity("urlRe"), 2) #TAG and Similarity featuresDict["hassourcetag"], featuresDict[ "hastargettag"] = features.haveItem("tagRe") featuresDict["tagsim"] = round(features.getItemSimilarity("tagRe"), 2) #EMAIL and Similarity featuresDict["hassourceemail"], featuresDict[ "hastargetemail"] = features.haveItem("emailRe") featuresDict["emailsim"] = round( features.getItemSimilarity("emailRe"), 2) #NUMBER and Similarity featuresDict["hassourcenumber"], featuresDict[ "hastargetnumber"] = features.haveItem("numberRe") featuresDict["numbersim"] = round( features.getNumberSimilarity("numberRe"), 2) #PUNCTUATION and Similarity featuresDict["hassourcepunctuation"], featuresDict[ "hastargetpunctuation"] = features.havePunctuation() featuresDict["punctsim"] = round( features.getPunctuationSimilarity(), 2) #Name Entity Detection and Similarity featuresDict["hassourcenameentity"], featuresDict[ "hastargetnameentity"] = features.haveNameEntity() featuresDict["nersimilarity"] = features.getNameEntitiesSimilarity( ) segmentFeaturesDict[dictKey] = featuresDict return segmentFeaturesDict
def main(filetrainConllu, filetestConllu, features_file, file_genarate, model_file=None): """ construire le dataset train le model automate classfier , testdaset build file conllu from tree build scrip evaluation :param model_file: fichier h5 :param filetestConllu: fichier conllu pour compare le resultat :param filetesttxt: :param features_file: :return: """ print("Chargement de données ...") # conllu_file = "Data/fr_gsd-ud-train.conllu" # conllu_file = "Data/fr_gsd-ud-train.conllu" # weight_embedding_file = "embd_file_vectors/embd.vec" x_train, y_train, labels_encoderX, labels_encoderY, all_tree = get_data( features_file, filetrainConllu) # x_test, y_test,features_Xtest=get_data (features_file, filetestConllu) all_tree_automate = list() print("Train model ....") input_dim = x_train.shape[1] nb_class = y_train.shape[1] if (model_file == None): model = get_model(x_train, y_train, nb_class, input_dim) exit() else: model = load_model(model_file) # features = Features(features_file) print(filetestConllu) obj_generateAlltree = ConstructAllTree(filetestConllu, get_mcd(), False) all_tree = obj_generateAlltree.get_allTree() features = Features(features_file) features.set_label_encoderX(labels_encoderX) features.set_label_encoderY(labels_encoderY) for id, tree in enumerate(all_tree): all_vertices = tree.get_vertices()[1:] liste_word = list() for index, vertice in enumerate(all_vertices): word = vertice.get_word() liste_word.append(word) print("INDEX=", id, " ", len(liste_word)) A = Automate(model, features=features, sentence=liste_word) tree = A.run("Data/embd_fr_50.vec") print("tree=", tree) all_tree_automate.append(tree) print("Generation du fichier conllu....") TransformTreeConllu(all_tree, "generate_data.txt", filetestConllu) print("Scrip Evaluation ...")
def generateFeatures(self, line): """It generates the features for a bisegment""" featuresDict = {} components = line.split("\t") sourceSegment = components[0] targetSegment = components[1] saScore = components[2] perplexitySource = components[3] perplexityTarget = components[4] category = "" if len(components) == 6: category = components[5] featuresDict["category"] = category sourceSegment = self.cleanSegment(sourceSegment) targetSegment = self.cleanSegment(targetSegment) features = Features(sourceSegment, targetSegment, self.pDict["fileRe"]) nWordsSource, nWordsTarget = features.getNWords() #--------Features used in ML------------------ #Sentence alignment score is already provided in the file featuresDict["saScore"] = saScore #The Ken-LM language model score (perplexity) for source and target segment featuresDict["perplexitySource"] = perplexitySource featuresDict["perplexityTarget"] = perplexityTarget #Gale-Church score featuresDict["cgscore"] = features.getCGSore() featuresDict["same"] = features.isEqual() #Capital Letters featuresDict["hassourcecl"], featuresDict[ "hastargetcl"] = features.haveCL() featuresDict["caplettersworddif"] = features.difWordsCapitalLetters() featuresDict[ "onlycaplettersdif"] = features.difWholeWordsCapitalLetters() #URL and Similarity featuresDict["hassourceurl"], featuresDict[ "hastargeturl"] = features.haveItem("urlRe") featuresDict["urlsim"] = features.getItemSimilarity("urlRe") #TAG and Similarity featuresDict["hassourcetag"], featuresDict[ "hastargettag"] = features.haveItem("tagRe") featuresDict["tagsim"] = features.getItemSimilarity("tagRe") #EMAIL and Similarity featuresDict["hassourceemail"], featuresDict[ "hastargetemail"] = features.haveItem("emailRe") featuresDict["emailsim"] = features.getItemSimilarity("emailRe") #NUMBER and Similarity featuresDict["hassourcenumber"], featuresDict[ "hastargetnumber"] = features.haveItem("numberRe") featuresDict["numbersim"] = features.getNumberSimilarity("numberRe") #PUNCTUATION and Similarity featuresDict["hassourcepunctuation"], featuresDict[ "hastargetpunctuation"] = features.havePunctuation() featuresDict["punctsim"] = features.getPunctuationSimilarity() #Name Entity Detection and Similarity featuresDict["hassourcenameentity"], featuresDict[ "hastargetnameentity"] = features.haveNameEntity() featuresDict["nersimilarity"] = features.getNameEntitiesSimilarity() return featuresDict
import pandas as pd import numpy as np import Features as feat from os import path import matplotlib.pyplot as plt dfs = pd.read_hdf(path.join("..", "chunks_big", "dataframe_0.h5"), key='table') df1 = pd.DataFrame() for index, trip in dfs.groupby(level=['Driver', 'Trip']): features = feat.Features(trip, []) # In km/h velocities = features.euclidean_helper() * 2.2369 plt.hist(velocities, bins=np.arange(0, 100, 5)) plt.show()
filename='run.log', level=log_level) console = logging.StreamHandler() console.setFormatter( logging.Formatter('%(levelname)7s - %(name)-8s: %(message)s')) logging.getLogger('').addHandler(console) log = logging.getLogger('main') # ---------------------------------------------------------------- # Load various components, and configure the modules that control # the crawling process # corpus_table = CorpusTable.CorpusTable(args.dbdir) # Storage layer spider = HTTPClient.HTTPClient() # Retrieval code url_normaliser = Normalisation.URLNormaliser() # URL normaliser feature_extractor = Features.Features(url_normaliser, ['title', 'h1']) # Feature extractor # URL Fitness Function #url_rank_function = SimplicityURLRank.SimplicityURLRank() # Prefer simple URLs #url_rank_function = SampleURLRank.SampleURLRank() # Sample code url_rank_function = HumanReadableURLRank.HumanReadableURLRank( ) # Prefer human-readable URLs page_filters = [ # Filters for page rejection # FuzzyDuplicateFilter.FuzzyDuplicateFilter(corpus_table), # Fuzzy hash using ssdeep DuplicateFilter.DuplicateFilter(corpus_table), # Perfect duplicate checker MinimumLengthFilter.MinimumLengthFilter(100), # Min length MaximumLengthFilter.MaximumLengthFilter(800000), # Max length URLCountFilter.URLCountFilter(0, 1000), # URL count MetadataRegexpFilter.MetadataRegexpFilter( 'content_type', 'text\/(x?html|plain)'), # Content type ] url_filters = [ # Filters for URL rejection
import numpy as np import pandas as pd from skimage import io, color, img_as_ubyte from skimage.feature import greycomatrix, greycoprops from sklearn.metrics.cluster import entropy import skimage import os from Features import * from camera import * print(" Gerarando nova classe!") controler = input (" Deseja adicionar uma nova classe?: y/n ") if controler == 'y': nome= input("Insira o nome da nova classe: ") nova_classe= camera(nome) nova_classe.save_img() features = Features() features.gerar_csv()
def ExtractFeature(row): instanceFeature = Features.Features(row.Path) conn2 = pyodbc.connect( "Driver={SQL Server};Server=.\SQLEXPRESS;Trusted_Connection=yes;database=GraphColoringPortfolio" ) cursor2 = conn2.cursor() ############################################################################################# Nodes, Edges and Density Features #cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #NumberOfNodes = instanceFeature.NumberOfNodes() #NumberOfEdges = instanceFeature.NumberOfEdges() #RatioNodesEdges = instanceFeature.RatioNodesEdges() #RatioEdgesNodes = instanceFeature.RatioEdgesNodes() #Density = instanceFeature.Density() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, numberofnodes, numberofedges, rationodesedges, ratioedgesnodes, density) values (?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, NumberOfNodes, NumberOfEdges, RatioNodesEdges, RatioEdgesNodes, Density) #else : # cursor2.execute("UPDATE InstancesFeatures SET numberofnodes=?, numberofedges=?, rationodesedges=?, ratioedgesnodes=?, density=? where GraphID=?", # NumberOfNodes, NumberOfEdges, RatioNodesEdges, RatioEdgesNodes, Density, existingInstance.GraphID) #conn2.commit() ############################################################################################## Degree Centrality Features #cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #instanceFeature.ComputeDegree() #MinDegree = instanceFeature.MinDegree() #MaxDegree = instanceFeature.MaxDegree() #MeanDegree = instanceFeature.MeanDegree() #StdDegree = instanceFeature.StdDegree() #VcDegree = instanceFeature.VcDegree() #MedianDegree = instanceFeature.MedianDegree() #Q1Degree = instanceFeature.Q1Degree() #Q3Degree = instanceFeature.Q3Degree() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, mindegree, maxdegree, meandegree, stddegree, vcdegree, mediandegree, q1degree, q3degree) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinDegree, MaxDegree, MeanDegree, StdDegree, VcDegree, MedianDegree, Q1Degree, Q3Degree) #else : # cursor2.execute("UPDATE InstancesFeatures SET mindegree=?, maxdegree=?, meandegree=?, stddegree=?, vcdegree=?, mediandegree=?, q1degree=?, q3degree=? where GraphID=?", # MinDegree, MaxDegree, MeanDegree, StdDegree, VcDegree, MedianDegree, Q1Degree, Q3Degree, existingInstance.GraphID) #conn2.commit() ############################################################################################## Betweenness Centrality Features #cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #instanceFeature.ComputeBtwnsCentrality() #MinBtwnsCentrality = instanceFeature.MinBtwnsCentrality() #MaxBtwnsCentrality = instanceFeature.MaxBtwnsCentrality() #MeanBtwnsCentrality = instanceFeature.MeanBtwnsCentrality() #StdBtwnsCentrality = instanceFeature.StdBtwnsCentrality() #VcBtwnsCentrality = instanceFeature.VcBtwnsCentrality() #MedianBtwnsCentrality = instanceFeature.MedianBtwnsCentrality() #Q1BtwnsCentrality = instanceFeature.Q1BtwnsCentrality() #Q3BtwnsCentrality = instanceFeature.Q3BtwnsCentrality() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, minbtwns, maxbtwns, meanbtwns, stdbtwns, vcbtwns, medianbtwns, q1btwns, q3btwns) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinBtwnsCentrality, MaxBtwnsCentrality, MeanBtwnsCentrality, StdBtwnsCentrality, VcBtwnsCentrality, MedianBtwnsCentrality, Q1BtwnsCentrality, Q3BtwnsCentrality) #else : # cursor2.execute("UPDATE InstancesFeatures SET minbtwns=?, maxbtwns=?, meanbtwns=?, stdbtwns=?, vcbtwns=?, medianbtwns=?, q1btwns=?, q3btwns=? where GraphID=?", # MinBtwnsCentrality, MaxBtwnsCentrality, MeanBtwnsCentrality, StdBtwnsCentrality, VcBtwnsCentrality, MedianBtwnsCentrality, Q1BtwnsCentrality, Q3BtwnsCentrality, existingInstance.GraphID) #conn2.commit() ############################################################################################## Closeness Centrality Features ##cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) ##existingInstance = cursor2.fetchone() ##instanceFeature.ComputeCloseness() ##MinCloseness = instanceFeature.MinClosenessCentrality() ##MaxCloseness = instanceFeature.MaxClosnessCentrality() ##MeanCloseness = instanceFeature.MeanClosenessCentrality() ##StdCloseness = instanceFeature.StdClosenessCentrality() ##VcCloseness = instanceFeature.VcClosenessCentrality() ##MedianCloseness = instanceFeature.MedianClosenessCentrality() ##Q1Closeness = instanceFeature.Q1ClosenessCentrality() ##Q3Closeness = instanceFeature.Q3ClosenessCentrality() ##if (existingInstance==None) : ## pk = uuid.uuid4() ## cursor2.execute("insert into InstancesFeatures(Id, GraphID, mincloseness, maxcloseness, meancloseness, stdcloseness, vccloseness, mediancloseness, q1closeness, q3closeness) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", ## str(pk), row.Id, MinCloseness, MaxCloseness, MeanCloseness, StdCloseness, VcCloseness, MedianCloseness, Q1Closeness, Q3Closeness) ##else : ## cursor2.execute("UPDATE InstancesFeatures SET mincloseness=?, maxcloseness=?, meancloseness=?, stdcloseness=?, vccloseness=?, mediancloseness=?, q1closeness=?, q3closeness=? where GraphID=?", ## MinCloseness, MaxCloseness, MeanCloseness, StdCloseness, VcCloseness, MedianCloseness, Q1Closeness, Q3Closeness, existingInstance.GraphID) ##conn2.commit() ############################################################################################## Eigenvector Centrality Features #cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #instanceFeature.ComputeEgvCentrality() #MinEgvCentrality = instanceFeature.MinEgvCentrality() #MaxEgvCentrality = instanceFeature.MaxEgvCentrality() #MeanEgvCentrality = instanceFeature.MeanEgvCentrality() #StdEgvCentrality = instanceFeature.StdEgvCentrality() #VcEgvCentrality = instanceFeature.VcEgvCentrality() #MedianEgvCentrality = instanceFeature.MedianEgvCentrality() #Q1EgvCentrality = instanceFeature.Q1EgvCentrality() #Q3EgvCentrality = instanceFeature.Q3EgvCentrality() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, minegvcentrality, maxegvcentrality, meanegvcentrality, stdegvcentrality, vcegvcentrality, medianegvcentrality, q1egvcentrality, q3egvcentrality) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinEgvCentrality, MaxEgvCentrality, MeanEgvCentrality, StdEgvCentrality, VcEgvCentrality, MedianEgvCentrality, Q1EgvCentrality, Q3EgvCentrality) #else : # cursor2.execute("UPDATE InstancesFeatures SET minegvcentrality=?, maxegvcentrality=?, meanegvcentrality=?, stdegvcentrality=?, vcegvcentrality=?, medianegvcentrality=?, q1egvcentrality=?, q3egvcentrality=? where GraphID=?", # MinEgvCentrality, MaxEgvCentrality, MeanEgvCentrality, StdEgvCentrality, VcEgvCentrality, MedianEgvCentrality, Q1EgvCentrality, Q3EgvCentrality, existingInstance.GraphID) #conn2.commit() ############################################################################################## Eccentricity Features #cursor2.execute("SELECT * FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #instanceFeature.ComputeEccentricity() #MinEccentricity = instanceFeature.MinEccentricity() #MaxEccentricity = instanceFeature.MaxEccentricity() #MeanEccentricity = instanceFeature.MeanEccentricity() #StdEccentricity = instanceFeature.StdEccentricity() #VcEccentricity = instanceFeature.VcEccentricity() #MedianEccentricity = instanceFeature.MedianEccentricity() #Q1Eccentricity = instanceFeature.Q1Eccentricity() #Q3Eccentricity = instanceFeature.Q3Eccentricity() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, mineccentricity, maxeccentricity, meaneccentricity, stdeccentricity, vceccentricity, medianeccentricity, q1eccentricity, q3eccentricity) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinEccentricity, MaxEccentricity, MeanEccentricity, StdEccentricity, VcEccentricity, MedianEccentricity, Q1Eccentricity, Q3Eccentricity) #else : # cursor2.execute("UPDATE InstancesFeatures SET mineccentricity=?, maxeccentricity=?, meaneccentricity=?, stdeccentricity=?, vceccentricity=?, medianeccentricity=?, q1eccentricity=?, q3eccentricity=? where GraphID=?", # MinEccentricity, MaxEccentricity, MeanEccentricity, StdEccentricity, VcEccentricity, MedianEccentricity, Q1Eccentricity, Q3Eccentricity, existingInstance.GraphID) #conn2.commit() ############################################################################################# Clustering Features #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #instanceFeature.ComputeClustering() #MinClustering = instanceFeature.MinClustering() #MaxClustering = instanceFeature.MaxClustering() #MeanClustering = instanceFeature.MeanClustering() #StdClustering = instanceFeature.StdClustering() #VcClustering = instanceFeature.VcClustering() #MedianClustering = instanceFeature.MedianClustering() #Q1Clustering = instanceFeature.Q1Clustering() #Q3Clustering = instanceFeature.Q3Clustering() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, minlocalclustering, maxlocalclustering, meanlocalclustering, stdlocalclustering, vclocalclustering, medianlocalclustering, q1clustering, q3clustering) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinClustering, MaxClustering, MeanClustering, StdClustering, VcClustering, MedianClustering, Q1Clustering, Q3Clustering) #else : # cursor2.execute("UPDATE InstancesFeatures SET minlocalclustering=?, maxlocalclustering=?, meanlocalclustering=?, stdlocalclustering=?, vclocalclustering=?, medianlocalclustering=?, q1clustering=?, q3clustering=? where GraphID=?", # MinClustering, MaxClustering, MeanClustering, StdClustering, VcClustering, MedianClustering, Q1Clustering, Q3Clustering, existingInstance.GraphID) #conn2.commit() ############################################################################################# Weighted Clustering Features #instanceFeature.ComputeWeightedClustering() #MinWeightedClustering = instanceFeature.MinWeightedClustering() #MaxWeightedClustering = instanceFeature.MaxWeightedClustering() #MeanWeightedClustering = instanceFeature.MeanWeightedClustering() #StdWeightedClustering = instanceFeature.StdWeightedClustering() #VcWeightedClustering = instanceFeature.VcWeightedClustering() #MedianWeightedClustering = instanceFeature.MedianWeightedClustering() #Q1WClustering = instanceFeature.Q1WeightedClustering() #Q3WClustering = instanceFeature.Q3WeightedClustering() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, minlocalwclustering, maxlocalwclustering, meanlocalwclustering, stdlocalwclustering, vclocalwclustering, medianlocalwclustering, q1wclustering, q3wclustering) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, MinWeightedClustering, MaxWeightedClustering, MeanWeightedClustering, StdWeightedClustering, VcWeightedClustering, MedianWeightedClustering, Q1WClustering, Q3WClustering) #else : # cursor2.execute("UPDATE InstancesFeatures SET minlocalwclustering=?, maxlocalwclustering=?, meanlocalwclustering=?, stdlocalwclustering=?, vclocalwclustering=?, medianwclustering=?, q1wclustering=?, q3wclustering=? where GraphID=?", # MinWeightedClustering, MaxWeightedClustering, MeanWeightedClustering, StdWeightedClustering, VcWeightedClustering, MedianWeightedClustering, Q1WClustering, Q3WClustering, existingInstance.GraphID) #conn2.commit() ############################################################################################# Adjacency Matrix Features #instanceFeature.ComputeAdjacencyMatrixEigenvalues() #Index = instanceFeature.Index() #SecondLargestAdjEgv = instanceFeature.SecondLargestAdjEgv() #SecondSmallestAdjEgv = instanceFeature.SecondSmallestAdjEgv() #SmallestAdjEgv = instanceFeature.SmallestAdjEgv() #MeanSpectrum = instanceFeature.MeanSpectrum() #StdSpectrum = instanceFeature.StdSpectrum() #Energy = instanceFeature.Energy() #GapLargestAndSecondLargestAdjEgv = instanceFeature.GapLargestAndSecondLargestAdjEgv() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, adjindex, seclargestadjegv, secsmallestadjegv, smallestadjegv, meanspectrum, stdspectrum, energy, gaplargestand2ndlargestadj) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, Index, SecondLargestAdjEgv, SecondSmallestAdjEgv, SmallestAdjEgv, MeanSpectrum, StdSpectrum, Energy, GapLargestAndSecondLargestAdjEgv) #else : # cursor2.execute("UPDATE InstancesFeatures SET adjindex=?, seclargestadjegv=?, secsmallestadjegv=?, smallestadjegv=?, meanspectrum=?, stdspectrum=?, energy=?, gaplargestand2ndlargestadj=? where GraphID=?", # Index, SecondLargestAdjEgv, SecondSmallestAdjEgv, SmallestAdjEgv, MeanSpectrum, StdSpectrum, Energy, GapLargestAndSecondLargestAdjEgv, existingInstance.GraphID) #conn2.commit() ############################################################################################## Laplacian Matrix Features #instanceFeature.ComputeLaplacianMatrixEigenvalues() #LaplacianIndex = instanceFeature.LaplacianIndex() #SecondLargestLapEgv = instanceFeature.SecondLargestLapEgv() #SecondSmallestNZLapEgv = instanceFeature.SecondSmallestNZLapEgv() #SmallestNZLapEgv = instanceFeature.SmallestNZLapEgv() #AlgebraicConnectivity = instanceFeature.AlgebraicConnectivity() #SmallestLapEgv = instanceFeature.SmallestLapEgv() #GapLargestAndSmallestNZLapEgv = instanceFeature.GapLargestAndSmallestNZLapEgv() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, laplacianindex, seclargestlapegv, secondsmallestnzlapegv, smallestnzlapegv, algconnectivity, smallestlapegv, gaplargestandsmallestnzlap) values (?, ?, ?, ?, ?, ?, ?, ?, ?)", # str(pk), row.Id, LaplacianIndex, SecondLargestLapEgv, SecondSmallestNZLapEgv, SmallestNZLapEgv, AlgebraicConnectivity, SmallestLapEgv, GapLargestAndSmallestNZLapEgv) #else : # cursor2.execute("UPDATE InstancesFeatures SET laplacianindex=?, seclargestlapegv=?, secondsmallestnzlapegv=?, smallestnzlapegv=?, algconnectivity=?, smallestlapegv=?, gaplargestandsmallestnzlap=? where GraphID=?", # LaplacianIndex, SecondLargestLapEgv, SecondSmallestNZLapEgv, SmallestNZLapEgv, AlgebraicConnectivity, SmallestLapEgv, GapLargestAndSmallestNZLapEgv, existingInstance.GraphID) #conn2.commit() ############################################################################################### Wiener Index Feature #WienerIndex = instanceFeature.WienerIndex() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, wienerindex) values (?, ?, ?)", # str(pk), row.Id, WienerIndex) #else : # cursor2.execute("UPDATE InstancesFeatures SET wienerindex=? where GraphID=?", # WienerIndex, existingInstance.GraphID) #conn2.commit() ############################################################################################## Average Path Length Feature AveragePathLength = instanceFeature.AveragePathLength() cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?", row.Id) existingInstance = cursor2.fetchone() if (existingInstance == None): pk = uuid.uuid4() cursor2.execute( "insert into InstancesFeatures(Id, GraphID, avgpathlength) values (?, ?, ?)", str(pk), row.Id, AveragePathLength) else: cursor2.execute( "UPDATE InstancesFeatures SET avgpathlength=? where GraphID=?", AveragePathLength, existingInstance.GraphID) conn2.commit() ############################################################################################# Degeneracy Feature #Degeneracy = instanceFeature.Degeneracy() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, degeneracy) values (?, ?, ?)", # str(pk), row.Id, Degeneracy) #else : # cursor2.execute("UPDATE InstancesFeatures SET degeneracy=? where GraphID=?", # Degeneracy, existingInstance.GraphID) #conn2.commit() ############################################################################################## Girth Feature #Girth = instanceFeature.Girth() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, girth) values (?, ?, ?)", # str(pk), row.Id, Girth) #else : # cursor2.execute("UPDATE InstancesFeatures SET girth=? where GraphID=?", # Girth, existingInstance.GraphID) #conn2.commit() ############################################################################################## Connected Components, Rank and CoRank Features #ConnectedComponents = instanceFeature.ConnectedComponents() #Rank = instanceFeature.Rank() #CoRank = instanceFeature.CoRank() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, connectedcomponents, rank, corank) values (?, ?, ?, ?, ?)", # str(pk), row.Id, ConnectedComponents, Rank, CoRank) #else : # cursor2.execute("UPDATE InstancesFeatures SET connectedcomponents=?, rank=?, corank=? where GraphID=?", # ConnectedComponents, Rank, CoRank, existingInstance.GraphID) #conn2.commit() ############################################################################################## Maximal Clique Size ##MaxCliqueSize = instanceFeature.MaxCliqueSize() ##cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) ##existingInstance = cursor2.fetchone() ##if (existingInstance==None) : ## pk = uuid.uuid4() ## cursor2.execute("insert into InstancesFeatures(Id, GraphID, maxcliquesize) values (?, ?, ?)", ## str(pk), row.Id, MaxCliqueSize) ## conn2.commit() ##else : ## cursor2.execute("UPDATE InstancesFeatures SET maxcliquesize=? where GraphID=?", ## MaxCliqueSize, existingInstance.GraphID) ## conn2.commit() ############################################################################################# Szeged Index Feature #SzegedIndex = instanceFeature.SzegedIndex() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, szegedindex) values (?, ?, ?)", # str(pk), row.Id, SzegedIndex) #else : # cursor2.execute("UPDATE InstancesFeatures SET szegedindex=? where GraphID=?", # SzegedIndex, existingInstance.GraphID) #conn2.commit() ############################################################################################## Spectral Bipartivity (Beta) Feature #Beta = instanceFeature.Beta() #cursor2.execute("SELECT GraphID FROM InstancesFeatures WHERE GraphID=?",row.Id) #existingInstance = cursor2.fetchone() #if (existingInstance==None) : # pk = uuid.uuid4() # cursor2.execute("insert into InstancesFeatures(Id, GraphID, beta) values (?, ?, ?)", # str(pk), row.Id, Beta) #else : # cursor2.execute("UPDATE InstancesFeatures SET beta=? where GraphID=?", # Beta, existingInstance.GraphID) #conn2.commit() #del instanceFeature #conn2.close() print(row.Name, " processed.")