Example #1
0
def handleOnePass(indexpath, workdir, length):
    print(indexpath, workdir, length)

    filename = config.getNgramFileName(length)
    filepath = workdir + os.sep + filename

    conn = sqlite3.connect(filepath)
    cur = conn.cursor()

    #begin processing
    indexfile = open(indexpath, 'r')

    for oneline in indexfile.readlines():
        oneline = oneline.rstrip(os.linesep)
        (title, textpath) = oneline.split('#')
        infile = config.getTextDir() + textpath
        infilesize = utils.get_file_length(infile + config.getSegmentPostfix())
        if infilesize < config.getMinimumFileSize():
            print("Skipping " + title + '#' + textpath)
            continue

        #process one document
        handleOneDocument(infile, cur, length)

    indexfile.close()

    conn.commit()

    if conn:
        conn.close()
Example #2
0
def handleOnePass(indexpath, workdir, length):
    print(indexpath, workdir, length)

    filename = config.getNgramFileName(length)
    filepath = workdir + os.sep + filename

    conn = sqlite3.connect(filepath)
    cur = conn.cursor()

    #begin processing
    indexfile = open(indexpath, 'r')

    for oneline in indexfile.readlines():
        oneline = oneline.rstrip(os.linesep)
        (title, textpath) = oneline.split('#')
        infile = config.getTextDir() + textpath
        infilesize = utils.get_file_length(infile + config.getSegmentPostfix())
        if infilesize < config.getMinimumFileSize():
            print("Skipping " + title + '#' + textpath)
            continue

        #process one document
        handleOneDocument(infile, cur, length)

    indexfile.close()

    conn.commit()

    if conn:
        conn.close()
Example #3
0
    def add_variation(self, f, im, name, ops):
        """Add a variation to the asset"""
        from models.accounts import Account

        # Make sure we have access to the associated account frame
        if not isinstance(self.account, Account):
            self.account = Account.one(Q._id == self.account)

        # Transform the original image to generate the variation
        vim = None
        if im.format.lower() == 'gif' and im.is_animated:
            # By-pass transforms for animated gifs
            fmt = {'ext': 'gif', 'fmt': 'gif'}

        else:
            # Transform the image based on the variation
            vim = im.copy()
            vim, fmt = Variation.transform_image(vim, ops)

            # Prepare the variation file for storage
            f = io.BytesIO()
            vim.save(f, **fmt)
            f.seek(0)

        # Add the variation to the asset
        variation = Variation(name=name,
                              ext=fmt['ext'],
                              meta={
                                  'length': get_file_length(f),
                                  'image': {
                                      'mode': (vim or im).mode,
                                      'size': (vim or im).size
                                  }
                              })

        # Set a version
        variation.version = generate_uid(3)
        while self.get_variation(name, variation.version):
            variation.version = generate_uid(3)

        # Store the variation
        variation.store_key = Variation.get_store_key(self, variation)
        backend = self.account.get_backend_instance()
        backend.store(f, variation.store_key)

        # We use the $push operator to store the variation to prevent race
        # conditions if multiple processes attempt to update the assets
        # variations at the same time.
        self.get_collection().update(
            {'_id': self._id}, {'$push': {
                'variations': variation._document
            }})

        return variation
Example #4
0
def handleOneIndex(indexpath, subdir, indexname, fast):
    inMemoryFile = "model.db"

    modeldir = os.path.join(config.getModelDir(), subdir, indexname)
    os.path.exists(modeldir) or os.makedirs(modeldir)


    def cleanupInMemoryFile():
        modelfile = os.path.join(config.getInMemoryFileSystem(), inMemoryFile)
        reportfile = modelfile + config.getReportPostfix()
        if os.access(modelfile, os.F_OK):
            os.unlink(modelfile)
        if os.access(reportfile, os.F_OK):
            os.unlink(reportfile)

    def copyoutInMemoryFile(modelfile):
        inmemoryfile = os.path.join\
            (config.getInMemoryFileSystem(), inMemoryFile)
        inmemoryreportfile = inmemoryfile + config.getReportPostfix()
        reportfile = modelfile + config.getReportPostfix()

        if os.access(inmemoryfile, os.F_OK):
            utils.copyfile(inmemoryfile, modelfile)
        if os.access(inmemoryreportfile, os.F_OK):
            utils.copyfile(inmemoryreportfile, reportfile)

    def cleanupFiles(modelnum):
        modeldir = os.path.join(config.getModelDir(), subdir, indexname)
        modelfile = os.path.join( \
            modeldir, config.getCandidateModelName(modelnum))
        reportfile = modelfile + config.getReportPostfix()
        if os.access(modelfile, os.F_OK):
            os.unlink(modelfile)
        if os.access(reportfile, os.F_OK):
            os.unlink(reportfile)

    def storeModelStatus(modelfile, textnum, nexttextnum):
        #store model info in status file
        modelstatuspath = modelfile + config.getStatusPostfix()
        #create None status
        modelstatus = {}
        modelstatus['GenerateStart'] = textnum
        modelstatus['GenerateEnd'] = nexttextnum
        utils.sign_epoch(modelstatus, 'Generate')
        utils.store_status(modelstatuspath, modelstatus)

    print(indexpath, subdir, indexname)

    indexstatuspath = indexpath + config.getStatusPostfix()
    indexstatus = utils.load_status(indexstatuspath)
    if not utils.check_epoch(indexstatus, 'MergeSequence'):
        raise utils.EpochError('Please mergeseq first.\n')
    if utils.check_epoch(indexstatus, 'Generate'):
        return

    #continue generating
    textnum, modelnum, aggmodelsize = 0, 0, 0
    if 'GenerateTextEnd' in indexstatus:
        textnum = indexstatus['GenerateTextEnd']
    if 'GenerateModelEnd' in indexstatus:
        modelnum = indexstatus['GenerateModelEnd']

    #clean up previous file
    if fast:
        cleanupInMemoryFile()

    cleanupFiles(modelnum)

    #begin processing
    indexfile = open(indexpath, 'r')
    for i, oneline in enumerate(indexfile.readlines()):
        #continue last generating
        if i < textnum:
            continue

        #remove trailing '\n'
        oneline = oneline.rstrip(os.linesep)
        (title, textpath) = oneline.split('#')
        infile = config.getTextDir() + textpath
        infilesize = utils.get_file_length(infile + config.getMergedPostfix())
        if infilesize < config.getMinimumFileSize():
            print("Skipping " + title + '#' + textpath)
            continue

        if fast:
            modelfile = os.path.join(config.getInMemoryFileSystem(), \
                                         inMemoryFile)
        else:
            modelfile = os.path.join(modeldir, \
                                         config.getCandidateModelName(modelnum))

        reportfile = modelfile + config.getReportPostfix()
        print("Proccessing " + title + '#' + textpath)
        if generateOneText(infile, modelfile, reportfile):
            aggmodelsize += infilesize
        print("Processed " + title + '#' + textpath)
        if aggmodelsize > config.getCandidateModelSize():
            #copy out in memory file
            if fast:
                modelfile = os.path.join\
                    (modeldir, config.getCandidateModelName(modelnum))
                copyoutInMemoryFile(modelfile)
                cleanupInMemoryFile()

            #the model file is in disk now
            nexttextnum = i + 1
            storeModelStatus(modelfile, textnum, nexttextnum)

            #new model candidate
            aggmodelsize = 0
            textnum = nexttextnum
            modelnum += 1

            #clean up next file
            cleanupFiles(modelnum)

            #save current progress in status file
            indexstatus['GenerateTextEnd'] = nexttextnum
            indexstatus['GenerateModelEnd'] = modelnum
            utils.store_status(indexstatuspath, indexstatus)


    #copy out in memory file
    if fast:
        modelfile = os.path.join\
            (modeldir, config.getCandidateModelName(modelnum))
        copyoutInMemoryFile(modelfile)
        cleanupInMemoryFile()

    #the model file is in disk now
    nexttextnum = i + 1
    storeModelStatus(modelfile, textnum, nexttextnum)

    indexfile.close()
    #end processing

    #save current progress in status file
    modelnum += 1
    indexstatus['GenerateTextEnd'] = nexttextnum
    indexstatus['GenerateModelEnd'] = modelnum

    utils.sign_epoch(indexstatus, 'Generate')
    utils.store_status(indexstatuspath, indexstatus)
Example #5
0
def upload():
    """Upload an asset"""

    # Check a file has been provided
    fs = request.files.get('asset')
    if not fs:
        return fail('No `asset` sent.')

    # Validate the parameters
    form = UploadForm(request.values)
    if not form.validate():
        return fail('Invalid request', issues=form.errors)

    # Prep the asset name for
    form_data = form.data

    # Name
    name = form_data['name']
    if not name:
        name = os.path.splitext(fs.filename)[0]
    name = slugify_name(name)

    # Extension
    ext = os.path.splitext(fs.filename)[1].lower()[1:]

    # If there's no extension associated with then see if we can guess it using
    # the imghdr module
    if not ext:
        fs.stream.seek(0)
        ext = imghdr.what(fs.filename, fs.stream.read()) or ''

    # If the file is a recognized image format then attempt to read it as an
    # image otherwise leave it as a file.
    asset_file = fs.stream
    asset_meta = {}
    asset_type = Asset.get_type(ext)
    if asset_type is 'image':
        try:
            asset_file, asset_meta = prep_image(asset_file)
        except IOError as e:
            return fail('File appears to be an image but it cannot be read.')

    # Add basic file information to the asset meta
    asset_meta.update({
        'filename': fs.filename,
        'length': get_file_length(asset_file)
    })

    # Create the asset
    asset = Asset(account=g.account._id,
                  name=name,
                  ext=ext,
                  meta=asset_meta,
                  type=asset_type,
                  variations=[])

    if form_data['expires']:
        asset.expires = form_data['expires']

    # Generate a unique Id for the asset
    asset.uid = generate_uid(6)
    while Asset.count(And(Q.account == g.account, Q.uid == asset.uid)) > 0:
        asset.uid = generate_uid(6)

    # Store the original file
    asset.store_key = Asset.get_store_key(asset)
    backend = g.account.get_backend_instance()
    backend.store(asset_file, asset.store_key)

    # Save the asset
    asset.insert()

    return success(asset.to_json_type())
Example #6
0
    else:
        #backup merged model
        utils.copyfile(mergedmodel, prunedmodel)
        pruneModel(prunedmodel, args.k, args.CDF)

    #validate pruned model
    print('validating')
    validateModel(prunedmodel)

    #export textual format
    print('exporting')
    exportfile = os.path.join(trydir, 'kmm_pruned.text')
    exportModel(prunedmodel, exportfile)

    #convert to interpolation
    print('converting')
    kmm_model = exportfile
    inter_model = os.path.join(trydir, config.getFinalModelFileName())
    convertModel(kmm_model, inter_model)

    modelsize = utils.get_file_length(inter_model)
    cwdstatus['PruneModelSize'] = modelsize
    utils.store_status(cwdstatuspath, cwdstatus)

    print('final model size:', modelsize)

    #sign status epoch
    utils.sign_epoch(cwdstatus, 'Prune')
    utils.store_status(cwdstatuspath, cwdstatus)
    print('done')
Example #7
0
    else:
        #backup merged model
        utils.copyfile(mergedmodel, prunedmodel)
        pruneModel(prunedmodel, args.k, args.CDF)

    #validate pruned model
    print('validating')
    validateModel(prunedmodel)

    #export textual format
    print('exporting')
    exportfile = os.path.join(trydir, 'kmm_pruned.text')
    exportModel(prunedmodel, exportfile)

    #convert to interpolation
    print('converting')
    kmm_model = exportfile
    inter_model = os.path.join(trydir, config.getFinalModelFileName())
    convertModel(kmm_model, inter_model)

    modelsize = utils.get_file_length(inter_model)
    cwdstatus['PruneModelSize'] = modelsize
    utils.store_status(cwdstatuspath, cwdstatus)

    print('final model size:', modelsize)

    #sign status epoch
    utils.sign_epoch(cwdstatus, 'Prune')
    utils.store_status(cwdstatuspath, cwdstatus)
    print('done')