コード例 #1
0
        for f in omeka_client.get_files_for_item(item['id']):
            fname = f['original_filename']
            name, ext = os.path.splitext(fname)

            if  ext.lower() in [".docx", ".doc", ".odt", ".rtf"]:
                num_docs_found += 1
                res, data = omeka_client.get_file(f['file_urls']['original'])
                download_file = os.path.join(temp_dir, fname)
                out = open(download_file, 'wb')
                out.write(data)
                out.close()
                logger.info("Converting office doc file %s to HTML",  f['id'])
                out_dir, x = os.path.split(download_file)
                html_file =  os.path.join(temp_dir, name + ".html")
                word2html.convert(download_file, html_file , True, True, False)
                
                if omeka_client.post_file_from_filename(html_file, item['id']):
                    num_html_uploaded += 1
                    logger.info("Uploaded  %s successfully",  f['id'])
                
        
logger.info("********************")
logger.info("SUMMARY:")
logger.info("Deleted %s HTML", num_html_deleted)
logger.info("Docs found: %s", num_docs_found)
logger.info("HTML files converted and added: %s", num_html_uploaded)
if num_docs_found == num_html_uploaded:
    logger.info("No errors detected")
else:
    logger.error("Number of docs does not match number of HTML files uploaded")
コード例 #2
0
                    element_texts.append(element_text)
                    

            element_texts.append({"html": False, "text" : file_path, "element" : {"id" : title_id}})
            item_to_upload = {"collection": {"id": collection_id}, "item_type": {"id":item_type_id}, "public": args["public"]}
            item_to_upload["element_texts"] = element_texts
            jsonstr = json.dumps(item_to_upload)
            previous_id =   id_map[file_path] if file_path in id_map else None
                
            if previous_id <> None:
                print "Re-uploading ", previous_id
                response, content = omeka_client.put("items" , previous_id, jsonstr)
                if response['status'] == '404':
                    previous_id = None
                    
            if previous_id == None:
                response, content = omeka_client.post("items", jsonstr)
            print content
            new_item = json.loads(content)
            new_item_id = new_item['id']
            print "Item ID", new_item_id
            id_map[file_path] = new_item_id
            #Save ID map every time - make this an option
            with open(file_stash, 'w') as outfile:
                json.dump(id_map, outfile)
            print omeka_client.post_file_from_filename(file_path, new_item_id )

            
with open(file_stash, 'w') as outfile:
                json.dump(id_map, outfile)
コード例 #3
0
    if not args['do_not_convert']:
        for f in omeka_client.get_files_for_item(item['id']):
            fname = f['original_filename']
            name, ext = os.path.splitext(fname)

            if ext.lower() in [".docx", ".doc", ".odt", ".rtf"]:
                num_docs_found += 1
                res, data = omeka_client.get_file(f['file_urls']['original'])
                download_file = os.path.join(temp_dir, fname)
                out = open(download_file, 'wb')
                out.write(data)
                out.close()
                logger.info("Converting office doc file %s to HTML", f['id'])
                out_dir, x = os.path.split(download_file)
                html_file = os.path.join(temp_dir, name + ".html")
                word2html.convert(download_file, html_file, True, True, False)

                if omeka_client.post_file_from_filename(html_file, item['id']):
                    num_html_uploaded += 1
                    logger.info("Uploaded  %s successfully", f['id'])

logger.info("********************")
logger.info("SUMMARY:")
logger.info("Deleted %s HTML", num_html_deleted)
logger.info("Docs found: %s", num_docs_found)
logger.info("HTML files converted and added: %s", num_html_uploaded)
if num_docs_found == num_html_uploaded:
    logger.info("No errors detected")
else:
    logger.error("Number of docs does not match number of HTML files uploaded")