Beispiel #1
0
    def __init__(self, tableName="document", copy=True):

        from contextionaryDatabase import Table
        from Context import Context

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        context = Context()

        self.defaultColumns = {
            "document_id": "serial",
            "document_title": "varchar(255)",
            "context_id": "bigint",
            "document_content": "text",
            "document_path": "text"
        }
        self.defaultPrimaryKeys = ["document_id"]
        self.defaultUnique = ["document_path"]
        self.defaultForeignKeys = {
            "context_id": (context.tableName, "context_id")
        }
        self.getTriggerFunction = None
        self.Table = Table(self.tableName)
Beispiel #2
0
    def __init__(self, tableName="input_text_keywords", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "input_text_id": "bigint",
            "context_id": "bigint",
            "keyword_id": "bigint",
            "keyword_position": "bigint[]",
            "keyword_text": "text",
            "phrase_id": "bigint"
        }
        self.defaultPrimaryKeys = ["input_text_id", "context_id", "keyword_id"]
        self.defaultForeignKeys = None
        self.defaultUnique = None
        self.tableDependencies = [
            "input_text_word_position", "input_text_context_identifier",
            "input_text_phrase_count", "context_phrase", "phrase"
        ]
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
Beispiel #3
0
    def __init__(self, tableName="frequency_distance", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "context_id": "bigint",
            "phrase_id": "bigint",
            "phrase_relative_frequency": "decimal",
            "phrase_distance_to_context": "decimal",
            "phrase_difficulty": "int"
        }

        self.defaultPrimaryKeys = ["context_id", "phrase_id"]
        self.defaultForeignKeys = None
        self.tableDependencies = [
            "context_phrase", "phrase_vector_space",
            "phrase_distance_to_context"
        ]
        self.defaultUnique = None
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
    def __init__(self, tableName="related_phrase", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "context_id": "bigint",
            "context_phrase_id": "bigint",
            "related_phrase_id": "bigint",
            "phrase_bonding_index": "decimal"
        }
        self.defaultPrimaryKeys = [
            "context_id", "context_phrase_id", "related_phrase_id"
        ]
        self.defaultForeignKeys = None
        self.tableDependencies = [
            "context_phrase", "phrase", "context_axis", "phrase_origin",
            "document"
        ]
        self.defaultUnique = None
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
    def __init__(self, tableName = "input_text", copy = False):
        
        from contextionaryDatabase import Table
        
        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {"input_text_id": "serial",
                               "input_text": "text"}
        self.defaultPrimaryKeys = ["input_text_id"]
        self.defaultUnique = ["input_text"]
        self.defaultForeignKeys = None
        self.getTriggerFunction = None

        self.Table = Table(self.tableName)
    def __init__(self, tableName = "context", copy = True):                
        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'
            

        
        # dictionary of default columns {key: value}
            # key: column name string
            # value: data type string
        self.defaultColumns = {"context_id": "serial",
                               "context_name": "varchar(255)", 
                               "parent_id": "bigint NULL",
                               "context_children_id": "bigint[] NULL",
                               "context_picture": "varchar(255)",
                               "directory_level": "bigint",
                               "context_path": "text"}
        
        # primary key [item]
            # item: column name string  
        self.defaultPrimaryKeys = ["context_id"]
        
        # list of natural keys (if primary key is a surrogate) [item]
            # item: column name string          
        self.defaultUnique = ["context_path"]

        # dictionary of foreign keys {key: value}
            # key: column name string
            # value: references tuple (first, second)
                # first: reference table name string
                # second: reference column name string  
        self.defaultForeignKeys = {"parent_id": (self.tableName, "context_id")}
        
        # get trigger function string
        self.getTriggerFunction = None
        
        # create table
        self.Table = Table(self.tableName)
    def __init__(self, tableName="context_axis", copy=False):

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "context_id": "bigint",
            "independent_context_id": "bigint",
            "axis_coordinate": "int"
        }
        self.defaultPrimaryKeys = ["context_id", "independent_context_id"]
        self.defaultForeignKeys = None
        self.tableDependencies = ["context"]
        self.defaultUnique = None
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
Beispiel #8
0
    def __init__(self, tableName="phrase_spelling_similarity", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "phrase_id": "bigint",
            "similar_spelling_phrase_id": "bigint",
            "similarity_index": "bigint"
        }
        self.defaultPrimaryKeys = ["phrase_id", "similar_spelling_phrase_id"]
        self.defaultForeignKeys = None
        self.defaultUnique = None
        self.tableDependencies = ["context_phrase", "phrase"]
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
Beispiel #9
0
    def __init__(self, tableName="phrase", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "phrase_id": "serial",
            "phrase_text": "varchar(255)",
            "phrase_length": "smallint",
            "red_flag": "smallint"
        }
        self.defaultPrimaryKeys = ["phrase_id"]
        self.defaultUnique = ["phrase_text"]
        self.defaultForeignKeys = None
        self.tableDependencies = ["phrase_origin"]
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
Beispiel #10
0
    def __init__(self, tableName="input_text_word_position", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "input_text_id": "bigint",
            "input_text_phrase_id": "bigint",
            "phrase_text": "text",
            "phrase_position": "bigint",
            "phrase_length": "bigint",
            "phrase_components": "bigint[]"
        }
        self.defaultPrimaryKeys = ["input_text_id", "input_text_phrase_id"]
        self.defaultForeignKeys = None
        self.defaultUnique = None
        self.Table = Table(self.tableName)
        self.getTriggerFunction = None
    def __init__(self, tableName="shared_word", copy=False):

        from contextionaryDatabase import Table

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {
            "long_phrase_id": "bigint",
            "sibling_id": "bigint",
            "shared_word": "varchar(255)",
            "shared_word_position_in_long_phrase": "bigint[]",
            "shared_word_position_in_sibling": "bigint[]"
        }
        self.defaultPrimaryKeys = [
            "long_phrase_id", "sibling_id", "shared_word"
        ]
        self.defaultForeignKeys = None
        self.defaultUnique = None
        self.tableDependencies = ["context_phrase", "phrase"]
        self.Table = Table(self.tableName)
        self.getTriggerFunction = self.triggerFunction()
    def __init__(self, tableName="phrase_origin", copy=True):

        from contextionaryDatabase import Table
        from Document import Document

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        document = Document()

        self.defaultColumns = {
            "document_id": "bigint",
            "phrase_text": "text",
            "phrase_count_per_document": "integer"
        }
        self.defaultPrimaryKeys = ["document_id", "phrase_text"]
        self.defaultUnique = None
        self.defaultForeignKeys = {
            "document_id": (document.tableName, "document_id")
        }
        self.Table = Table(self.tableName)
        self.getTriggerFunction = None
Beispiel #13
0
class Document(object):
    def __init__(self, tableName="document", copy=True):

        from contextionaryDatabase import Table
        from Context import Context

        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        context = Context()

        self.defaultColumns = {
            "document_id": "serial",
            "document_title": "varchar(255)",
            "context_id": "bigint",
            "document_content": "text",
            "document_path": "text"
        }
        self.defaultPrimaryKeys = ["document_id"]
        self.defaultUnique = ["document_path"]
        self.defaultForeignKeys = {
            "context_id": (context.tableName, "context_id")
        }
        self.getTriggerFunction = None
        self.Table = Table(self.tableName)

    def addRecord(self, documentPath, connectDB):
        """
        Adds record to document table and phrase origin table
        """
        print("add document start...", documentPath)
        context = Context()
        phraseOrigin = PhraseOrigin()

        nonPrimeAttributes = [
            x for x in list(self.defaultColumns.keys())
            if x not in self.defaultPrimaryKeys
        ]

        if 'Linux' in platform.platform():
            contextPath = "/".join(documentPath.split("/")[:-1])
            documentFilename = documentPath.split("/")[-1]
        else:
            contextPath = "\\".join(documentPath.split("\\")[:-1])
            documentFilename = documentPath.split("\\")[-1]

        documentTitle = documentFilename[:len(documentFilename) - 4]

        contextID = context.Table.selectColumn("context_id",
                                               {"context_path": [contextPath]})
        contextID = contextID[0]

        file = open(documentPath, "r", encoding="UTF-8-sig")
        documentContent = file.read()
        file.close()

        cur = connectDB.connection.cursor()
        try:
            strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""")
            strSQL2 = sql.SQL(', ').join(
                map(sql.Identifier, nonPrimeAttributes))
            strSQL3 = sql.SQL(', ').join(sql.Placeholder() *
                                         len(nonPrimeAttributes))
            cur.execute(
                strSQL1.format(sql.Identifier(self.tableName), strSQL2,
                               strSQL3),
                ([documentTitle, contextID, documentContent, documentPath]))
        finally:
            cur.close()

        documentID = self.Table.selectColumn("document_id",
                                             {"document_path": [documentPath]})

        if phraseOrigin.Table.exists():
            textProcessor = TextProcessor(documentContent,
                                          config.PARSE['phraseMaxLength'])
            phraseDictList = textProcessor.phraseCount.values()
            for phraseDict in phraseDictList:
                for key, val in phraseDict.items():
                    phraseOrigin.addRecord(documentID[0], key, val, connectDB)

        print("add document end...", documentPath)

    def deleteRecord(self, documentPath, connectDB):
        """
        Deletes record from document table
        """
        cur = connectDB.connection.cursor()
        try:
            strSQL = sql.SQL("""DELETE FROM {} WHERE document_path = %s""")
            cur.execute(strSQL.format(sql.Identifier(self.tableName)),
                        ([documentPath]))
        finally:
            cur.close()
class InputText(object):

    
    def __init__(self, tableName = "input_text", copy = False):
        
        from contextionaryDatabase import Table
        
        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'

        self.defaultColumns = {"input_text_id": "serial",
                               "input_text": "text"}
        self.defaultPrimaryKeys = ["input_text_id"]
        self.defaultUnique = ["input_text"]
        self.defaultForeignKeys = None
        self.getTriggerFunction = None

        self.Table = Table(self.tableName)


    def addRecord(self, text, connectDB):
        
        """
        Adds record to "input text" and "input text phrase count" and "input text word position"
        This process is analogous to the addRecord module in the Document class
        """
        
        from InputTextPhraseCount import InputTextPhraseCount
        from InputTextWordPosition import InputTextWordPosition
        from psycopg2 import connect, sql 
        from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT 

        nonPrimeAttributes = [x for x in list(self.defaultColumns.keys()) if x not in self.defaultPrimaryKeys]
        
        cur = connectDB.connection.cursor() 
        try:
            strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""")
            strSQL2 = sql.SQL(', ').join(map(sql.Identifier, nonPrimeAttributes))
            strSQL3 = sql.SQL(', ').join(sql.Placeholder() * len(nonPrimeAttributes))
            cur.execute(strSQL1.format(sql.Identifier(self.tableName), strSQL2, strSQL3), ([text]))
        finally:
            cur.close() 
        
        inputTextID = self.Table.selectColumn("input_text_id", {"input_text": [text]})
        inputTextPhraseCount = InputTextPhraseCount()
        
        if inputTextPhraseCount.Table.exists():
            from Document import TextProcessor
            textProcessor = TextProcessor(text, config.PARSE['phraseMaxLength'])
            phraseDictList = textProcessor.phraseCount.values()
            for phraseDict in phraseDictList:
                for key, val in phraseDict.items():
                    inputTextPhraseCount.addRecord(inputTextID[0], key, val, connectDB)
                    

        inputTextWordPosition = InputTextWordPosition()
        if inputTextWordPosition.Table.exists():
            
            maxPL = config.PARSE['phraseMaxLength']
            tp = TextProcessor(text, maxPL)
            PT = tp.getWordOrderedList()
            ID = 0
            PL = 0
            PID_dict = dict()
            for i in range(len(PT), len(PT)-maxPL, -1):
                PL += 1
                PP = list(range(1, len(PT[0:i]) + 1))
                for pp in PP:
                    ID += 1
                    PID_dict.update({(pp,PL) : ID})
                
            PID_dict2 = dict()
            for key, val in PID_dict.items():
                children = []
                PP = key[0]
                PL = key[1]
            
                for pl in range(1, PL+1):
                    start = PID_dict[(PP, pl)]
                    children.extend(list(range(start,start+PL-pl+1)))
                
                ngram = " ".join(PT[(PP-1):(PP+PL-1)])
                dict_key = PID_dict[key]
                dict_value = (key, ngram, children)
                PID_dict2.update({dict_key: dict_value})
                
            for key, val in PID_dict2.items():
                inputTextPhraseID = key
                phraseText = val[1]
                phrasePosition = val[0][0]
                phraseLength = val[0][1]
                phraseComponents = val[2]
                inputTextWordPosition.addRecord(inputTextID[0], inputTextPhraseID, phraseText, phrasePosition, phraseLength, phraseComponents, connectDB)

    
    def deleteRecord(self, text, connectDB):
        
        """
        Deletes record from input text
        """
        
        cur = connectDB.connection.cursor() 
        try:
            strSQL = sql.SQL("""DELETE FROM {} WHERE input_text = %s""")
            cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([text]))
        finally:
            cur.close() 
class Context(object):
    
    def __init__(self, tableName = "context", copy = True):                
        self.tableName = tableName
        self.copy = copy
        if self.copy == True:
            self.tableName = tableName + '_temp'
            

        
        # dictionary of default columns {key: value}
            # key: column name string
            # value: data type string
        self.defaultColumns = {"context_id": "serial",
                               "context_name": "varchar(255)", 
                               "parent_id": "bigint NULL",
                               "context_children_id": "bigint[] NULL",
                               "context_picture": "varchar(255)",
                               "directory_level": "bigint",
                               "context_path": "text"}
        
        # primary key [item]
            # item: column name string  
        self.defaultPrimaryKeys = ["context_id"]
        
        # list of natural keys (if primary key is a surrogate) [item]
            # item: column name string          
        self.defaultUnique = ["context_path"]

        # dictionary of foreign keys {key: value}
            # key: column name string
            # value: references tuple (first, second)
                # first: reference table name string
                # second: reference column name string  
        self.defaultForeignKeys = {"parent_id": (self.tableName, "context_id")}
        
        # get trigger function string
        self.getTriggerFunction = None
        
        # create table
        self.Table = Table(self.tableName)

    def addRecord(self, contextPath, connectDB):
        
        """
        Adds record to context table
        """
        nonPrimeAttributes = [x for x in list(self.defaultColumns.keys()) if x not in self.defaultPrimaryKeys]

        if 'Linux' in platform.platform():
            dirpath_split = contextPath.split("/")
        else:
            dirpath_split = contextPath.split("\\")
        
        contextName = dirpath_split[-1]
        directoryLevel = len(dirpath_split)-1
        parent = dirpath_split[-2]

        if 'Linux' in platform.platform():
            parentPath = "/".join(dirpath_split[0:-1])
        else:
            parentPath= "\\".join(dirpath_split[0:-1])
        
        contextChildrenID = None
        contextPicture = None
        
        # root = next(os.walk(os.getcwd()))[1][0]
        root = config.PARSE['rootDirectory']
        if parent == root:
            parentID = None
        else:
            cur = connectDB.connection.cursor() 
            try:
                strSQL = sql.SQL("""SELECT context_id FROM {} WHERE context_path = %s""")
                cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([parentPath]))
                parentID = cur.fetchone()
                parentID = parentID[0]
            finally:
                cur.close()
                      
        cur = connectDB.connection.cursor()
        try:
            strSQL1 = sql.SQL("""INSERT INTO {} ({}) VALUES ({})""")
            strSQL2 = sql.SQL(', ').join(map(sql.Identifier, nonPrimeAttributes))
            strSQL3 = sql.SQL(', ').join(sql.Placeholder() * len(nonPrimeAttributes))
            cur.execute(strSQL1.format(sql.Identifier(self.tableName), strSQL2, strSQL3), ([contextName, parentID, contextChildrenID, contextPicture, directoryLevel, contextPath]))
        finally:
            cur.close()

        # update context children ID in --context-- table
        self.generateContextChildrenID(contextPath, connectDB)
            
        # update context picture in --context-- table
        contextPicture = self.generateContextPicture(contextPath)
        self.updateContextProperty(contextPath, {"context_picture": contextPicture}, connectDB)

    def deleteRecord(self, contextPath, connectDB):
        
        """
        Deletes record from context table
        """
        
        cur = connectDB.connection.cursor()
        try:
            strSQL = sql.SQL("""DELETE FROM {} WHERE context_path = %s""")
            cur.execute(strSQL.format(sql.Identifier(self.tableName)), ([contextPath]))
        finally:
            cur.close()
                
 
    def updateContextProperty(self, contextPath, setClause, connectDB): 
        
        """
        Updates record from context table
        """
        
        for key, val in setClause.items():
            
            if key == "parent_id":
                cipid = self.Table.selectColumn("parent_id", {"context_path": [contextPath]})
                key = "context_id"
                contextPath = self.Table.selectColumn("context_path", {key: [cipid[0]]})
            
            cur = connectDB.connection.cursor()
            try:
                strSQL = sql.SQL("""UPDATE {} SET {} = %s WHERE context_path = %s""")
                cur.execute(strSQL.format(sql.Identifier(self.tableName), sql.Identifier(key)), (val, contextPath))
            finally:
                cur.close() 
                # con.close()

    def generateContextChildrenID(self, contextPath, connectDB): 
        
        contextID = self.Table.selectColumn("context_id", {"context_path": [contextPath]})
        parentID = self.Table.selectColumn("parent_id", {"context_path": [contextPath]})

        if parentID[0]:

            contextChildrenID = self.Table.selectColumn("context_children_id", {"context_id": [parentID[0]]})
            
            if contextChildrenID[0] == None:
                contextChildrenID = [contextID[0]]
                parentContextPath = self.Table.selectColumn("context_path", {"context_id": [parentID[0]]})
                self.updateContextProperty(parentContextPath[0], {"context_children_id": contextChildrenID}, connectDB)
            else:
                contextChildrenID[0].extend(contextID)
                parentContextPath = self.Table.selectColumn("context_path", {"context_id": [parentID[0]]})
                self.updateContextProperty(parentContextPath[0], {"context_children_id": contextChildrenID[0]}, connectDB)
            
           
    def generateContextPicture(self, contextPath): 
        
        """
        Generates context picture for context table
        """
        
        contextID = self.Table.selectColumn("context_id", {"context_path": [contextPath]})
        contextName = self.Table.selectColumn("context_name", {"context_path": [contextPath]})
        return("-".join([str(contextID[0]), contextName[0]]))