コード例 #1
0
def ShowClassSchema(databaseName, volumeName, className):
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)

    oosqlSystem.TransBegin(PyOOSQL.X_BROWSE_BROWSE)

    description = oosqlSystem.GetTableDescription(volumeID, className)

    print "ClassName :", description[0]
    print "ClassId   :", description[1]
    print "Attribute Infomation :"
    for attrInfo in description[2]:
        print "%4d %35s  %-10s %s" % (attrInfo[1], attrInfo[0],
                                      TypeName(attrInfo[2]),
                                      ComplexTypeName(attrInfo[3]))
    print "Index Information :"
    i = 0
    for indexInfo in description[3]:
        print "%4s %35s  %-12s %10s" % (i, indexInfo[0], indexInfo[1],
                                        indexInfo[2])
        for attrInfo in indexInfo[4]:
            print "%40s  %s" % (description[2][attrInfo[0]][0], attrInfo[1])
        i = i + 1

    oosqlSystem.TransCommit()
    oosqlSystem.DismountDB(databaseID)
コード例 #2
0
def GetTableInfo(argv):
    (databaseName, volumeName, tableNameList) = ParseArgument(argv)

    # mount database
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)

    # begin transaction
    oosqlSystem.TransBegin(PyOOSQL.X_BROWSE_BROWSE)

    # get all tables in database
    if len(tableNameList) == 0:
        query = oosqlSystem.CreateQuery(volumeID)
        query.ExecDirect("select className from lomSysClasses")
        results = query.FetchAll()
        # remove system catalog table from list
        tableNameList = RemoveSystemCatalogTable(results)
        query = None
        # display the number of tables
        print "=" * 79
        print "SUMMARY:"
        print "The Number of Tables in Database '%s', Volume '%s': %d" % (
            databaseName, volumeName, len(tableNameList))
        print "=" * 79
        print

    # display the description of tables
    for tableName in tableNameList:
        (tableName, tableId, attributeInfos,
         indexInfos) = oosqlSystem.GetTableDescription(volumeID, tableName)
        print
        print "=" * 79
        print "I. Table Name: %s" % (tableName)
        print "=" * 79
        print "II. Attribute Information"
        DisplayAttributeInfo(attributeInfos)
        print "=" * 79
        print "III. B-Tree Index Information"
        DisplayIndexInfo(indexInfos, attributeInfos)
        print "=" * 79
        print

    # commit transaction
    oosqlSystem.TransCommit()

    # dismount database
    oosqlSystem.DismountDB(databaseID)
コード例 #3
0
def ShowAllClasses(databaseName, volumeName):
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)

    oosqlSystem.TransBegin(PyOOSQL.X_BROWSE_BROWSE)

    classNames = oosqlSystem.GetAllClassNames(volumeID)

    print "Total %d classes are defined" % len(classNames)
    i = 0
    for className in classNames:
        print "%4d %30s" % (i, className)
        i = i + 1

    oosqlSystem.TransCommit()
    oosqlSystem.DismountDB(databaseID)
コード例 #4
0
#/*        (1999)).                                                            */
#/*    [4] Whang, K., Lee, J., Kim, M., Lee, M., Lee, K., Han, W., and Kim,    */
#/*        J., "Tightly-Coupled Spatial Database Features in the               */
#/*        Odysseus/OpenGIS DBMS for High-Performance," GeoInformatica,        */
#/*        Vol. 14, No. 4, pp. 425-446, Oct. 2010.                             */
#/*    [5] Whang, K., Lee, J., Kim, M., Lee, M., and Lee, K., "Odysseus: a     */
#/*        High-Performance ORDBMS Tightly-Coupled with Spatial Database       */
#/*        Features," In Proc. 23rd IEEE Int'l Conf. on Data Engineering       */
#/*        (ICDE), pp. 1493-1494 (demo), Istanbul, Turkey, Apr. 16-20, 2007.   */
#/*                                                                            */
#/******************************************************************************/

import string
import PyOOSQL

system = PyOOSQL.OOSQL_System()

print '-' * 79
print system.GetVersionString()
print '-' * 79
cfgstring = system.GetCompilationParamString()
cfgs = string.split(cfgstring, '\n')
for cfg in cfgs:
    splitted = string.split(cfg, ':')

    if len(splitted) == 2:
        name = string.strip(splitted[0])
        value = string.strip(splitted[1])
        print "%50s %-20s" % (name, value)
print '-' * 79
コード例 #5
0
def LoaderExtract(argv):
    (databaseName, volumeName, temporaryDatabaseName, temporaryVolumeName,
     dataFileName, pagerankFileName, pagerankMode) = ParseArgument(argv)

    # use external sort utility
    useExternalSortUtility = 1

    # mount database
    print "-" * 79
    print "Mount database '%s'" % (databaseName)
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)
    temporaryVolumeID = oosqlSystem.MountVolumeByVolumeName(
        temporaryDatabaseName, temporaryVolumeName)

    # begin transaction
    print "-" * 79
    print "Transaction begin"
    oosqlSystem.TransBegin(PyOOSQL.X_BROWSE_BROWSE)

    # get className and attribute list in the datafile
    f = open(dataFileName, "r")
    classLine = f.readline()
    f.close()

    # replace '(' and ')' into <space>
    classLine = string.join(string.split(classLine, '('), ' ')
    classLine = string.join(string.split(classLine, ')'), ' ')

    # retrives className and attributesInDatafile
    classLineSplitted = string.split(classLine)
    className = classLineSplitted[1]
    attributesInDatafile = classLineSplitted[2:]

    # get text attributes in the database schema
    (className, classId, attributeInfos,
     indexInfos) = oosqlSystem.GetTableDescription(volumeID, className)
    attributes = []
    for attributeInfo in attributeInfos:
        if attributeInfo[2] == PyOOSQL.OOSQL_TYPE_TEXT:
            if attributeInfo[0] in attributesInDatafile:
                attributes.append(attributeInfo[0])

    # prepare temporary path
    env_ODYS_TEMP_PATH = os.environ["ODYS_TEMP_PATH"]

    if sys.platform == "win32":
        dirSeparator = "\\"
    else:
        dirSeparator = "/"

    # extract keyword
    nAttributes = len(attributes)
    for i in range(nAttributes):
        attrName = attributes[i]
        if i < nAttributes - 1:
            nextAttrName = attributes[i + 1]
            nextPostingFileName = "%s_TEXT_%s_%s_Posting" % (
                dataFileName, className, nextAttrName)
            nextPostingFileName = string.join(
                string.split(nextPostingFileName, dirSeparator), '_')
            nextSortedPostingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
                dataFileName, className, nextAttrName)
            nextSortedPostingFileName = string.join(
                string.split(nextSortedPostingFileName, dirSeparator), '_')
            if os.access("%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator, nextPostingFileName), 0) or \
               os.access("%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator, nextSortedPostingFileName), 0):
                print "Skip extracting keyword from class '%s', attribute '%s'" % (
                    className, attrName)
                continue

        postingFileName = "%s_TEXT_%s_%s_Posting" % (dataFileName, className,
                                                     attrName)
        postingFileName = string.join(
            string.split(postingFileName, dirSeparator), '_')
        sortedPostingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
            dataFileName, className, attrName)
        sortedPostingFileName = string.join(
            string.split(sortedPostingFileName, dirSeparator), '_')
        if os.access(
                "%s%s%s" %
            (env_ODYS_TEMP_PATH, dirSeparator, sortedPostingFileName), 0):
            print "Skip extracting keyword from class '%s', attribute '%s'" % (
                className, attrName)
        else:
            oosqlSystem.Tool_ExtractKeyword(
                volumeID,
                className,
                attrName,
                dataFileName,
                postingFileName,
                0,  # from start
                -1,  # to end
                1)  # alwaysUsePreviousPostingFile = false

            # PageRank Option
            if (pagerankMode == 1):
                pagerankIDFileName = env_ODYS_TEMP_PATH + dirSeparator + pagerankFileName
                inputFileName = env_ODYS_TEMP_PATH + dirSeparator + postingFileName
                outputFileName = inputFileName + "_PageRank"
                Convert_LogicalIDtoPageRankID(pagerankIDFileName,
                                              inputFileName, outputFileName)
                os.rename(outputFileName, inputFileName)

            if useExternalSortUtility:
                print "Do external sorting for %s" % (postingFileName)
                oosqlSystem.Tool_SortPosting(postingFileName,
                                             sortedPostingFileName)
                os.unlink("%s%s%s" %
                          (env_ODYS_TEMP_PATH, dirSeparator, postingFileName))

    # commit transaction
    print "-" * 79
    print "Transaction commit"
    oosqlSystem.TransCommit()

    # dismount database
    print "-" * 79
    print "Dismount database"
    oosqlSystem.Dismount(temporaryVolumeID)
    oosqlSystem.DismountDB(databaseID)
コード例 #6
0
            # PageRank Option
            if (pagerankMode == 1):
                pagerankIDFileName = env_ODYS_TEMP_PATH + dirSeparator + pagerankFileName
                inputFileName = env_ODYS_TEMP_PATH + dirSeparator + postingFileName
                outputFileName = inputFileName + "_PageRank"
                Convert_LogicalIDtoPageRankID(pagerankIDFileName,
                                              inputFileName, outputFileName)
                os.rename(outputFileName, inputFileName)

            if useExternalSortUtility:
                print "Do external sorting for %s" % (postingFileName)
                oosqlSystem.Tool_SortPosting(postingFileName,
                                             sortedPostingFileName)
                os.unlink("%s%s%s" %
                          (env_ODYS_TEMP_PATH, dirSeparator, postingFileName))

    # commit transaction
    print "-" * 79
    print "Transaction commit"
    oosqlSystem.TransCommit()

    # dismount database
    print "-" * 79
    print "Dismount database"
    oosqlSystem.Dismount(temporaryVolumeID)
    oosqlSystem.DismountDB(databaseID)


PyOOSQL.util_set_restart_command_after_segfault(string.join(sys.argv))
LoaderExtract(sys.argv)
コード例 #7
0
def LoaderMergeBuild(argv):
    (databaseName, volumeName, temporaryDatabaseName, temporaryVolumeName,
     dataFileName, pagerankFileName, pagerankMode) = ParseArgument(argv)

    # show previous loading history
    ShowLoadingHistory(databaseName, volumeName)

    line = raw_input("Are you sure ([y]es / [n]o) ? ")
    if string.lower(line[0]) == 'y':
        print "-" * 79
        print "Now, Start Loading..."
    elif string.lower(line[0]) == 'n':
        print "-" * 79
        print "Restart this command after confirmation."
        sys.exit(1)

    # mount database
    print "-" * 79
    print "Mount database '%s'" % (databaseName)
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)
    temporaryVolumeID = oosqlSystem.MountVolumeByVolumeName(
        temporaryDatabaseName, temporaryVolumeName)

    # set bulk flush mode
    oosqlSystem.SetCfgParam("USE_BULKFLUSH", "TRUE")

    # begin transaction
    print "-" * 79
    print "Transaction begin"
    oosqlSystem.TransBegin(PyOOSQL.X_RR_RR)

    # get className and attribute list in the datafile
    f = open(dataFileName, "r")
    classLine = f.readline()
    f.close()
    # replace '(' and ')' into <space>
    classLine = string.join(string.split(classLine, '('), ' ')
    classLine = string.join(string.split(classLine, ')'), ' ')
    # retrives className and attributesInDatafile
    classLineSplitted = string.split(classLine)
    className = classLineSplitted[1]
    attributesInDatafile = classLineSplitted[2:]

    # get text attributes in the database schema
    (className, classId, attributeInfos,
     indexInfos) = oosqlSystem.GetTableDescription(volumeID, className)
    attributes = []
    for attributeInfo in attributeInfos:
        if attributeInfo[2] == PyOOSQL.OOSQL_TYPE_TEXT:
            if attributeInfo[0] in attributesInDatafile:
                attributes.append(attributeInfo[0])

    # prepare temporary path
    env_ODYS_TEMP_PATH = os.environ["ODYS_TEMP_PATH"]
    if sys.platform == "win32":
        dirSeparator = "\\"
    else:
        dirSeparator = "/"

    # loaddb
    print "-" * 79
    print "Load data from '%s'" % (dataFileName)
    isDeferredTextIndexMode = 1
    useBulkloading = 1
    useDescriptorUpdating = 1
    smallUpdateFlag = 0

    print "Start Loading"
    os.system("date")

    oosqlSystem.Tool_LoadDB(volumeID, temporaryVolumeID,
                            isDeferredTextIndexMode, smallUpdateFlag,
                            useBulkloading, useDescriptorUpdating,
                            dataFileName, pagerankFileName, pagerankMode)

    print "End Loading"
    os.system("date")

    print "Start Mapping"
    os.system("date")
    # mapping
    for attrName in attributes:
        print "-" * 79
        print "Map posting for class '%s', attribute '%s'" % (className,
                                                              attrName)
        sortedPostingExist = CheckIfSortedPostingExist(dataFileName, className,
                                                       attrName)

        if not sortedPostingExist:
            postingFileName = "%s_TEXT_%s_%s_Posting" % (dataFileName,
                                                         className, attrName)
            postingFileName = string.join(
                string.split(postingFileName, dirSeparator), '_')
            newPostingFileName = "%s_TEXT_%s_%s_Posting_Mapped" % (
                dataFileName, className, attrName)
            newPostingFileName = string.join(
                string.split(newPostingFileName, dirSeparator), '_')

            oosqlSystem.Tool_MapPosting(volumeID, className, attrName,
                                        [postingFileName], newPostingFileName,
                                        "TEXT_%s_OID" % (className), 0,
                                        pagerankFileName, pagerankMode)

            # rename mapped posting into posting
            srcName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                                  newPostingFileName)
            destName = srcName[:-7]  # remove trailing "_Mapped"
            try:
                os.unlink(destName)
            except OSError:
                pass
            os.rename(srcName, destName)
        else:
            postingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
                dataFileName, className, attrName)
            postingFileName = string.join(
                string.split(postingFileName, dirSeparator), '_')
            newPostingFileName = "%s_TEXT_%s_%s_SortedPosting_Mapped" % (
                dataFileName, className, attrName)
            newPostingFileName = string.join(
                string.split(newPostingFileName, dirSeparator), '_')

            oosqlSystem.Tool_MapPosting(volumeID, className, attrName,
                                        [postingFileName], newPostingFileName,
                                        "TEXT_%s_OID" % (className), 0,
                                        pagerankFileName, pagerankMode)

            # rename mapped posting into posting
            srcName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                                  newPostingFileName)
            destName = srcName[:-7]  # remove trailing "_Mapped"
            try:
                os.unlink(destName)
            except OSError:
                pass
            os.rename(srcName, destName)
    print "End Mapping"
    os.system("date")

    # commit transaction
    print "-" * 79
    print "Transaction commit"
    oosqlSystem.TransCommit()

    # dismount database
    print "-" * 79
    print "Dismount database"
    oosqlSystem.Dismount(temporaryVolumeID)
    oosqlSystem.DismountDB(databaseID)

    # unlink oid file
    oidFileName = "TEXT_%s_OID" % (className)
    destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator, oidFileName)
    try:
        os.unlink(destName)
    except OSError:
        pass

    # log file information which has been successfully loaded
    WriteLoadingHistory(databaseName, volumeName, dataFileName)

    return
コード例 #8
0
def Loader(argv):
    (databaseName, volumeName, temporaryDatabaseName, temporaryVolumeName,
     dataFileName, pagerankFileName, pagerankMode) = ParseArgument(argv)

    # mount database
    print "-" * 79
    print "Mount database '%s'" % (databaseName)
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)
    temporaryVolumeID = oosqlSystem.MountVolumeByVolumeName(
        temporaryDatabaseName, temporaryVolumeName)

    # set bulk flush mode
    oosqlSystem.SetCfgParam("USE_BULKFLUSH", "TRUE")

    # begin transaction
    print "-" * 79
    print "Transaction begin"
    oosqlSystem.TransBegin(PyOOSQL.X_RR_RR)

    # get className and attribute list in the datafile
    f = open(dataFileName, "r")
    classLine = f.readline()
    f.close()
    # replace '(' and ')' into <space>
    classLine = string.join(string.split(classLine, '('), ' ')
    classLine = string.join(string.split(classLine, ')'), ' ')
    # retrives className and attributesInDatafile
    classLineSplitted = string.split(classLine)
    className = classLineSplitted[1]
    attributesInDatafile = classLineSplitted[2:]

    # get text attributes in the database schema
    (className, classId, attributeInfos,
     indexInfos) = oosqlSystem.GetTableDescription(volumeID, className)
    attributes = []
    for attributeInfo in attributeInfos:
        if attributeInfo[2] == PyOOSQL.OOSQL_TYPE_TEXT:
            if attributeInfo[0] in attributesInDatafile:
                attributes.append(attributeInfo[0])

    # prepare temporary path
    env_ODYS_TEMP_PATH = os.environ["ODYS_TEMP_PATH"]

    if sys.platform == "win32":
        dirSeparator = "\\"
    else:
        dirSeparator = "/"

    # determine loadingMode : INITIAL_BULKLOADING, SMALL_APPEND_BULKLOADING, MEDIUM_APPEND_BULKLOADING, LARGE_APPEND_BULKLOADING
    numObjectsInDatabase = oosqlSystem.GetNumObjectsInClass(
        volumeID, className)
    numObjectsInFile = PyOOSQL.CountObjectsInLoadDbFile(dataFileName)
    print "Objects in the class '%s' is %d" % (className, numObjectsInDatabase)
    print "Objects in the file '%s' is %d" % (dataFileName, numObjectsInFile)
    if numObjectsInDatabase == 0:
        loadingMode = INITIAL_BULKLOADING
    elif numObjectsInFile < 2000:
        loadingMode = SMALL_APPEND_BULKLOADING
    elif numObjectsInDatabase * 0.1 > numObjectsInFile:
        loadingMode = MEDIUM_APPEND_BULKLOADING
    else:
        loadingMode = LARGE_APPEND_BULKLOADING
    print "Loading Mode :", loadingMode

    # loaddb
    print "-" * 79
    print "Load data from '%s'" % (dataFileName)
    isDeferredTextIndexMode = 1
    useBulkloading = 1
    useDescriptorUpdating = 1
    if loadingMode == SMALL_APPEND_BULKLOADING or loadingMode == MEDIUM_APPEND_BULKLOADING:
        smallUpdateFlag = 1
    else:
        smallUpdateFlag = 0
    oosqlSystem.Tool_LoadDB(volumeID, temporaryVolumeID,
                            isDeferredTextIndexMode, smallUpdateFlag,
                            useBulkloading, useDescriptorUpdating,
                            dataFileName, pagerankFileName, pagerankMode)

    # mapping
    env_ODYS_TEMP_PATH = os.environ["ODYS_TEMP_PATH"]
    if sys.platform == "win32":
        dirSeparator = "\\"
    else:
        dirSeparator = "/"

    for attrName in attributes:
        print "-" * 79
        print "Map posting for class '%s', attribute '%s'" % (className,
                                                              attrName)
        sortedPostingExist = CheckIfSortedPostingExist(dataFileName, className,
                                                       attrName)
        if not sortedPostingExist:
            postingFileName = "%s_TEXT_%s_%s_Posting" % (dataFileName,
                                                         className, attrName)
            postingFileName = string.join(
                string.split(postingFileName, dirSeparator), '_')

            oosqlSystem.Tool_MapPosting(
                volumeID, className, attrName, [postingFileName],
                "TEXT_%s_%s_Posting_Mapped" % (className, attrName),
                "TEXT_%s_OID" % (className), 0, pagerankFileName, pagerankMode)

            # rename mapped posting into posting
            srcName = "%s%sTEXT_%s_%s_Posting_Mapped" % (
                env_ODYS_TEMP_PATH, dirSeparator, className, attrName)
            destName = "%s%sTEXT_%s_%s_Posting" % (
                env_ODYS_TEMP_PATH, dirSeparator, className, attrName)
            try:
                os.unlink(destName)
            except OSError:
                pass
            os.rename(srcName, destName)
        else:
            postingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
                dataFileName, className, attrName)
            postingFileName = string.join(
                string.split(postingFileName, dirSeparator), '_')

            oosqlSystem.Tool_MapPosting(
                volumeID, className, attrName, [postingFileName],
                "TEXT_%s_%s_SortedPosting_Mapped" % (className, attrName),
                "TEXT_%s_OID" % (className), 0, pagerankFileName, pagerankMode)

            # rename mapped posting into posting
            srcName = "%s%sTEXT_%s_%s_SortedPosting_Mapped" % (
                env_ODYS_TEMP_PATH, dirSeparator, className, attrName)
            destName = "%s%sTEXT_%s_%s_SortedPosting" % (
                env_ODYS_TEMP_PATH, dirSeparator, className, attrName)
            try:
                os.unlink(destName)
            except OSError:
                pass
            os.rename(srcName, destName)

    # build text index
    config = PyOOSQL.lom_Text_ConfigForInvertedIndexBuild()
    if loadingMode == SMALL_APPEND_BULKLOADING:
        config.isUsingBulkLoading = 0
        config.isUsingKeywordIndexBulkLoading = 0
        config.isUsingReverseKeywordIndexBulkLoading = 0
    elif loadingMode == MEDIUM_APPEND_BULKLOADING:
        config.isUsingBulkLoading = 1
        config.isUsingKeywordIndexBulkLoading = 0
        config.isUsingReverseKeywordIndexBulkLoading = 0
    elif loadingMode == INITIAL_BULKLOADING or loadingMode == LARGE_APPEND_BULKLOADING:
        config.isUsingBulkLoading = 1
        config.isUsingKeywordIndexBulkLoading = 1
        config.isUsingReverseKeywordIndexBulkLoading = 1
    config.isBuildingExternalReverseKeywordFile = 0
    config.isBuildingDocIdIndex = 1
    config.isSortingPostingFile = 1
    config.isUsingStoredPosting = 0

    for attrName in attributes:
        print "-" * 79
        print "Build text index for class '%s', attribute '%s'" % (className,
                                                                   attrName)
        sortedPostingExist = CheckIfSortedPostingExist(dataFileName, className,
                                                       attrName)
        if sortedPostingExist:
            config.isSortingPostingFile = 0
        else:
            config.isSortingPostingFile = 1
        oosqlSystem.Tool_BuildTextIndex(volumeID, temporaryVolumeID, className,
                                        attrName, config)

    # commit transaction
    print "-" * 79
    print "Transaction commit"
    oosqlSystem.TransCommit()

    # dismount database
    print "-" * 79
    print "Dismount database"
    oosqlSystem.Dismount(temporaryVolumeID)
    oosqlSystem.DismountDB(databaseID)

    # unlink posting file and sorted posting file
    for attrName in attributes:
        postingFileName = "TEXT_%s_%s_Posting" % (className, attrName)
        destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                               postingFileName)
        try:
            os.unlink(destName)
        except OSError:
            pass
        postingFileName = "%s_TEXT_%s_%s_Posting" % (dataFileName, className,
                                                     attrName)
        postingFileName = string.join(
            string.split(postingFileName, dirSeparator), '_')
        destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                               postingFileName)
        try:
            os.unlink(destName)
        except OSError:
            pass
        sortedPostingFileName = "TEXT_%s_%s_SortedPosting" % (className,
                                                              attrName)
        sortedPostingFileName = string.join(
            string.split(sortedPostingFileName, dirSeparator), '_')
        destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                               sortedPostingFileName)
        try:
            os.unlink(destName)
        except OSError:
            pass

        sortedPostingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
            dataFileName, className, attrName)
        sortedPostingFileName = string.join(
            string.split(sortedPostingFileName, dirSeparator), '_')
        destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                               sortedPostingFileName)
        try:
            os.unlink(destName)
        except OSError:
            pass

    # unlink oid file
    oidFileName = "TEXT_%s_OID" % (className)
    destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator, oidFileName)
    try:
        os.unlink(destName)
    except OSError:
        pass
コード例 #9
0
def LoaderParallelExtract(argv):
    (databaseName, volumeName, temporaryDatabaseName, temporaryVolumeName,
     dataFileNameList, mergedFileName, divideNumber,
     mergedPostingFlag) = ParseArgument(argv)

    if len(dataFileNameList) == 1:
        dataFileName = dataFileNameList[0]
    else:
        numSourceDataFiles = PyOOSQL.MergeDataInLoadDbFiles(
            dataFileNameList, mergedFileName)
        dataFileName = mergedFileName

    numObjectsInFile = PyOOSQL.CountObjectsInLoadDbFile(dataFileName)
    numObjectsToExtract = numObjectsInFile / divideNumber

    extractorName = '_ExtractKeyword.py'

    startObjectNo = 0
    endObjectNo = numObjectsToExtract - 1

    for i in range(0, divideNumber):

        if i == divideNumber - 1:
            endObjectNo = -1

        arguments = []
        arguments.append(extractorName)  # program
        arguments.append(databaseName)  # database name
        arguments.append(volumeName)  # volume name
        arguments.append('-temporary')  # -temporary
        arguments.append(temporaryDatabaseName)  # temporary database name
        arguments.append(temporaryVolumeName)  # temporary volume name
        arguments.append(dataFileName)  # data file name
        arguments.append(str(startObjectNo))  # start object no
        arguments.append(str(endObjectNo))  # end object no
        arguments.append(str(i))  # process no

        pid = os.fork()
        if not pid:
            # child process execution part
            # execute keyword extractor
            os.execvp(extractorName, arguments)
        else:
            # parent process execution part
            # execute monitoring thread
            thread.start_new_thread(MonitorProcess, (
                i,
                pid,
                extractorName,
                arguments,
                divideNumber,
            ))

            # adjust start and end object no.
            startObjectNo = endObjectNo + 1
            endObjectNo = startObjectNo + numObjectsToExtract - 1

    # execute polling
    while 1:
        # if all child process are exited
        if nExitedProcesses == divideNumber:
            break
        else:
            time.sleep(1)

    # handle -storeMergedPosting argument
    if mergedPostingFlag:
        # merge sorted posting files which are extracted in parallel
        LoaderMergePosting(databaseName, volumeName, temporaryDatabaseName,
                           temporaryVolumeName, dataFileName)
        print "Parallel keyword extraction is done"
    else:
        # nothing to do
        print "Parallel keyword extraction is done"
コード例 #10
0
def LoaderMergePosting(databaseName, volumeName, temporaryDatabaseName,
                       temporaryVolumeName, dataFileName):
    # mount database
    oosqlSystem = PyOOSQL.OOSQL_System()
    databaseID = oosqlSystem.MountDB(databaseName)
    volumeID = oosqlSystem.GetVolumeID(databaseID, volumeName)
    temporaryVolumeID = oosqlSystem.MountVolumeByVolumeName(
        temporaryDatabaseName, temporaryVolumeName)

    # begin transaction
    oosqlSystem.TransBegin(PyOOSQL.X_RR_RR)

    # get className and attribute list in the datafile
    f = open(dataFileName, "r")
    classLine = f.readline()
    f.close()
    # replace '(' and ')' into <space>
    classLine = string.join(string.split(classLine, '('), ' ')
    classLine = string.join(string.split(classLine, ')'), ' ')
    # retrives className and attributesInDatafile
    classLineSplitted = string.split(classLine)
    className = classLineSplitted[1]
    attributesInDatafile = classLineSplitted[2:]

    # get text attributes in the database schema
    (className, classId, attributeInfos,
     indexInfos) = oosqlSystem.GetTableDescription(volumeID, className)
    attributes = []
    for attributeInfo in attributeInfos:
        if attributeInfo[2] == PyOOSQL.OOSQL_TYPE_TEXT:
            if attributeInfo[0] in attributesInDatafile:
                attributes.append(attributeInfo[0])

    # merging
    env_ODYS_TEMP_PATH = os.environ["ODYS_TEMP_PATH"]
    if sys.platform == "win32":
        dirSeparator = "\\"
    else:
        dirSeparator = "/"

    for attrName in attributes:
        print "-" * 79
        print "Merge posting for class '%s', attribute '%s'" % (className,
                                                                attrName)
        # if merged posting already exists, skip merging
        mergedPostingExist = CheckIfMergedPostingExist(dataFileName, className,
                                                       attrName)
        if mergedPostingExist:
            continue

        sortedPostingExist = CheckIfSortedPostingExist(dataFileName, className,
                                                       attrName)
        postingFileNameList = GetPostingFileNameList(dataFileName, className,
                                                     attrName,
                                                     sortedPostingExist)

        if not sortedPostingExist:
            newPostingFileName = "%s_TEXT_%s_%s_Posting" % (
                dataFileName, className, attrName)
            newPostingFileName = string.join(
                string.split(newPostingFileName, dirSeparator), '_')
            # merge divided posting files
            oosqlSystem.Tool_MergePosting(postingFileNameList,
                                          newPostingFileName)
        else:
            newPostingFileName = "%s_TEXT_%s_%s_SortedPosting" % (
                dataFileName, className, attrName)
            newPostingFileName = string.join(
                string.split(newPostingFileName, dirSeparator), '_')
            # merge divided posting files
            oosqlSystem.Tool_MergePosting(postingFileNameList,
                                          newPostingFileName)

    print "-" * 79

    # commit transaction
    oosqlSystem.TransCommit()

    # dismount database
    oosqlSystem.Dismount(temporaryVolumeID)
    oosqlSystem.DismountDB(databaseID)

    # unlink posting file and sorted posting file
    for attrName in attributes:
        sortedPostingExist = CheckIfSortedPostingExist(dataFileName, className,
                                                       attrName)
        postingFileNameList = GetPostingFileNameList(dataFileName, className,
                                                     attrName,
                                                     sortedPostingExist)
        for postingFileName in postingFileNameList:
            destName = "%s%s%s" % (env_ODYS_TEMP_PATH, dirSeparator,
                                   postingFileName)
            try:
                os.unlink(destName)
            except OSError:
                pass