Exemple #1
0
def kgToCVSFile():
    kgFiles = [
        "RAP-MSU_2019-08-29.ttl", "all.locus_brief_info.7.ttl",
        "agrold.oryza_sativa.ttl", "agrold.oryza_sativa_xrefs.ttl"
    ]
    for kgfile in kgFiles:
        getEntitiesPropertiesValue(kgfile)
    def test_getEnittiesPropertiesValue(self):
        """
        Our Knowledge dataset:
        1. oryzabase.ttl
        2. OryzabaseGeneList.ttl 
       
        This test is suppose to create the CSV file of the entry 
        file with colunms representing the properties of     interest 
        of each entity of the KB.
        fileName = "oryzabase_test.ttl"
        getEntitiesPropertiesValue(fileName)
        """

        fileName = "OryzabaseGeneList.ttl"
        getEntitiesPropertiesValue(fileName)
    def test_createFrequnecyModel(self):
        listOfKBCSVFile = []
        listOfKBCSVFileFolder = []
        listOfFqModel = []
        listOfFqModelFolder = []
        listOfAttributs = ['description', None]
        for attrib in listOfAttributs:
            folder, fileName = getEntitiesPropertiesValue(
                "oryzabase_testold.ttl", attrib, "Datasets")
            print("Dataset CSV file name ", fileName)
            print("Dataset CSV file folder ", folder)
            listOfKBCSVFile.append(fileName)
            listOfKBCSVFileFolder.append(folder)

            listOfFrequencies = ["tfidf"]
            #['tf', 'idf', 'tfidf']
            for freq in listOfFrequencies:
                fqModelfolder, fqModel = createFrequencyModel(
                    fileName, attrib, None, "row", "KB", freq, folder)
                print("Model file name ", fqModel)
                print("Model file folder ", folder)
                listOfFqModel.append(fqModel)
                listOfFqModelFolder.append(fqModelfolder)

        for kbcsvFile in range(len(listOfKBCSVFile)):
            for fqmodel in range(len(listOfFqModel)):
                for freq in listOfFrequencies:
                    filePath = os.path.join(os.path.join(
                        DATA_FOLDER, listOfKBCSVFileFolder[kbcsvFile]), listOfKBCSVFile[kbcsvFile])
                    if freq == "tfidf" and os.path.getsize(filePath) > 0:
                        wordsImpFileFolder, wordsImpFile = wordsImportance(
                            listOfFqModel[fqmodel], freq, listOfKBCSVFile[kbcsvFile], attrib, None, "row", listOfFqModelFolder[fqmodel], listOfKBCSVFileFolder[kbcsvFile])
                        print("Word important file name ", wordsImpFile)
                        print("Word important file folder ", wordsImpFileFolder)
Exemple #4
0
    def test_createFrequnecyModel(self):
        getEntitiesPropertiesValue(
            "oryzabase_ground.ttl", None, "Grounds", "Outputs")
        print("End ground")
        exit()
        listOfKBCSVFile = []
        listOfKBCSVFileFolder = []

        listOfFqModel = []
        listOfFqModelFolder = []

        listOfWordsImportance = []
        listOfWordsImportanceFolder = []
        listOfAttributs = ['description', None]
        for attrib in listOfAttributs:
            for kb in ["oryzabase_testold.ttl", "oryzabase_testold.ttl"]:
                folder, fileName = getEntitiesPropertiesValue(
                    kb, attrib, "Datasets")
                print("Dataset CSV file name ", fileName)
                print("Dataset CSV file folder ", folder)
                listOfKBCSVFile.append(fileName)
                listOfKBCSVFileFolder.append(folder)

                fqModelfolder, fqModel = createFrequencyModel(
                    fileName, attrib, None, "row", "KB", "tfidf", folder)
                print("Model file name ", fqModel)
                print("Model file folder ", folder)
                listOfFqModel.append(fqModel)
                listOfFqModelFolder.append(fqModelfolder)

                filePath = os.path.join(os.path.join(
                    DATA_FOLDER, folder), fileName)
                wordsImpFileFolder, wordsImpFile = wordsImportance(
                    fqModel, "tfidf", fileName, attrib, None, "row", fqModelfolder, folder)
                #print("Word important file name ", wordsImpFile)
                #print("Word important file folder ", wordsImpFileFolder)
                listOfWordsImportance.append(wordsImpFile)
                listOfWordsImportanceFolder.append(wordsImpFileFolder)
        for attrib in listOfAttributs:
            completSimilarityFolder, completeSimilarityFile = completeSimilarityOfDatasets("myModel.bin", "tfidf", listOfKBCSVFile[0], listOfWordsImportance[0], listOfKBCSVFile[1],
                                                                                           listOfWordsImportance[1], attrib,  "Models", listOfKBCSVFileFolder[0], listOfWordsImportanceFolder[0])
Exemple #5
0
def test_getAttributeVectorGround():
    getEntitiesPropertiesValue(
        "oryzabase_ground.ttl", None, "Grounds", "Outputs")
    print("End ground")
Exemple #6
0
from src.commons import readCompressDataFile, createtfIdfModel, wordsImportance, searchEntityInText, createListOfText, generateTermDocumentMatrix, createCooccurrenceMatrix, createTfIdfAndBowModel, completeKmeans, stoplist, readDataFile
from src.kgmanagement import getEntitiesPropertiesValue
from src.embedding import trainingModel, cleaningDataset, createStopListFromFile, completeSimilarityOfDatasets


if __name__== "__main__":
   fileName = "oryzabase_test.ttl"
   getEntitiesPropertiesValue(fileName)

   fileName = "OryzabaseGeneList_test.ttl"  
   getEntitiesPropertiesValue(fileName)
   completeSimilarityOfDatasets("myModel.bin", "OryzabaseGeneList_test.csv", "oryzabase_test.csv", "Outputs")
   #create extracted values from predicates of all entities in the file
   #getEntitiesPropertiesValue("gramene_Oryza_sativa_japonica_genes.ttl")
   #trainingModel("gramene_Oryza_sativa_japonica_genes.csv")
   #createtfIdfModel("gramene_Oryza_sativa_japonica_genes.csv", "description", "Texts")
   #wordsImportance("gramene_Oryza_sativa_japonica_genes.csv", "description", "Texts")
   #readCompressDataFile("test.csv", "Texts")
   #searchEntityInText("gramene_Oryza_sativa_japonica_genes.csv", "label", "newdata.csv", "Abstract")
   #searchEntityInText("wordsLessImportance50pourcent.csv", "words", "newdata.csv", "Abstract")
  # stoplist = createStopListFromFile("wordsLessImportance50pourcent.csv", "words", "Texts")
   #cleanData = cleaningDataset(stoplist,"newdata.csv", "Abstract" )
   #trainingModel(stoplist, "newdata.csv", "Abstract")
   """
   print("##################################################")
   listOfText = createListOfText("gramene_Oryza_sativa_japonica_genes.csv", "description" )
   print(listOfText)

   listOfVectors = createCooccurrenceMatrix(stoplist, listOfText)
   
   for k in range(2,50):
Exemple #7
0
 def test_getAttributeVectorRDF(self):
     getEntitiesPropertiesValue(
         "DB_Lake.db2013.rdf", None, "Datasets", "Outputs")
     print("End RDF")
Exemple #8
0
def test_evaluation(log):
    try:
        getEntitiesPropertiesValue()

    except Exception:
        traceback.print_exc(file=log)