Exemple #1
0
def pdfrw_object(fn):
    '''hachoir doesn't do pdf'''
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    fakeobj = get_fake_metadata_object(fn)
    try:
        reader = PdfReader(fn)
    except Exception, exc:
        traceback.print_exc(file=open(os.path.join(SYSDIR, 'dbgop'), 'w'))
        return None
Exemple #2
0
def get_real_metadata_object(fn):
    '''get object with metadata gleaned from internal file metadata'''
    #TODO: workaround for file locking bug
    if sys.platform == 'win32':
        newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
        if os.path.isfile(newfn):
            newfn = auto_increment_fn(newfn)
        shutil.copyfile(fn, newfn)
        fn = newfn
    obj = get_fake_metadata_object(fn)
    d = get_real_metadata(fn)
    if not d: return obj
    obj.author = d['author']
    obj.title = d['title']
    obj.date = d['creation_date']
    obj.FileData_DateCreated = d['creation_date']
    obj.FileData_DateModified = d['modification_date']
    obj.FileData_FileType = determine_file_type(fn)
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Exemple #3
0
def get_odf_metadata_object(fn):
    # if sys.platform == 'win32':
    #     newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
    #     if os.path.isfile(newfn):
    #         newfn = auto_increment_fn(newfn)
    #     shutil.copyfile(fn, newfn)
    #     fn = newfn
    obj = get_fake_metadata_object(fn)
    try:
        d = get_odf_metadata(fn)
    except:
        traceback.print_exc()
        print 'Parsing oxml document %s failed' % fn
        return obj
    obj.author = d['author']
    obj.title = d['title']
    obj.date = d['creation_date']
    obj.FileData_DateCreated = d['creation_date']
    obj.FileData_DateModified = d['modification_date']
    if d['description']: obj.BibData_Annote = d['description']
    obj.FileData_FileType = 'odf_doc'
    obj.FileData_FileName = os.path.basename(fn)
    return obj
Exemple #4
0
def get_oxml_metadata_object(fn):
    # if sys.platform == 'win32':
    #     newfn = os.path.join(CACHEDIR, 'Workaround', os.path.basename(fn))
    #     if os.path.isfile(newfn):
    #         newfn = auto_increment_fn(newfn)
    #     shutil.copyfile(fn, newfn)
    #     fn = newfn
    obj = get_fake_metadata_object(fn)
    try:
        d = get_oxml_metadata(fn)
    except:
        traceback.print_exc()
        print 'Parsing oxml document %s failed' % fn
        return obj
    obj.author = d['author']
    obj.title = d['title']
    obj.date = d['creation_date']
    obj.FileData_DateCreated = d['creation_date']
    obj.FileData_DateModified = d['modification_date']
    if d['description']: obj.BibData_Annote = d['description']
    obj.FileData_FileType = 'oxml_doc'
    obj.FileData_FileName = os.path.basename(fn)
    return obj