def process_gdocs_resource(save_dir, gdocs_resource_id, gdocs_access_token=None): # login to gdocs and get a client object gd_client = getAuthorizedGoogleDocsClient() # Create a AuthSub Token based on gdocs_access_token String auth_sub_token = gdata.gauth.AuthSubToken(gdocs_access_token) \ if gdocs_access_token \ else None # get the Google Docs Entry gd_entry = gd_client.GetDoc(gdocs_resource_id, None, auth_sub_token) # Get the contents of the document gd_entry_url = gd_entry.content.src html = gd_client.get_file_content(gd_entry_url, auth_sub_token) # Transformation and get images cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) # Return the title and filename. Old comment states # that returning this filename might kill the ability to # do multiple tabs in parallel, unless it gets offloaded # onto the form again. return (gd_entry.title.text, "Google Document")
def test_latex(self): latex_files = os.listdir(test_folder_name + 'latex/') i = 0 while (i < len(latex_files)): f = latex_files[i] filename, extension = os.path.splitext(f) if (extension != ''): latex_files.remove(f) else: i = i + 1 for f in latex_files: original_filename = test_folder_name + 'latex/' + f filename, extension = os.path.splitext(original_filename) valid_filename = filename + '.cnxml' output_filename = filename + '.tmp' diff_filename = filename + '.diff' err_filename = filename + '.err' fp = open(original_filename, 'r') latex_archive = fp.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) fp.close() output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of ' + f + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of ' + f + ', information on these errors are in ' + err_filename)
def test_url(self): url_files=os.listdir(test_folder_name+'url/') i=0 while(i < len(url_files)): f=url_files[i] filename, extension = os.path.splitext(f) if(extension != ''): url_files.remove(f) else: i=i+1 for f in url_files: input_file=open(test_folder_name+'url/'+f,'r') url=input_file.readline() input_file.close() output_filename=test_folder_name+'url/'+f+'.cnxml' valid_filename=test_folder_name+'url/'+f+'.cnxml' output_filename=test_folder_name+'url/'+f+'.tmp' diff_filename = test_folder_name+'url/'+f+'.diff' err_filename = test_folder_name+'url/'+f+'.err' import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] try: import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of '+f+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of '+f+', information on these errors are in '+err_filename) except urllib2.URLError, e: print('URL '+url+' could not be opened') quit()
def test_odt(self): odt_files = os.listdir(test_folder_name + 'odt/') i = 0 # Find only .odt files in the testing folder for odt while (i < len(odt_files)): f = odt_files[i] filename, extension = os.path.splitext(f) if (extension != '.odt'): odt_files.remove(f) else: i = i + 1 for f in odt_files: original_filename = test_folder_name + 'odt/' + f filename, extension = os.path.splitext(original_filename) valid_filename = filename + '.cnxml' output_filename = filename + '.tmp' odt_filename = original_filename diff_filename = filename + '.diff' err_filename = filename + '.err' try: open(valid_filename, 'r') except IOError as e: print('Missing valid file (' + valid_filename + ') for testing ' + original_filename) return tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) validate_cnxml(cnxml) output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of ' + original_filename + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of ' + original_filename + ', information on these errors are in ' + err_filename)
def cnxml_view(request): check_login(request) form = Form(request, schema=CnxmlSchema) save_dir = os.path.join(request.registry.settings['transform_dir'], request.session['upload_dir']) cnxml_filename = os.path.join(save_dir, 'index.cnxml') transformerror = request.session.get('transformerror') # Check for successful form completion if 'cnxml' in request.POST and form.validate(): cnxml = form.data['cnxml'] # Keep sure we use the standard python ascii string and encode Unicode to xml character mappings if isinstance(cnxml, unicode): cnxml = cnxml.encode('ascii', 'xmlcharrefreplace') # get the list of files from upload.zip if it exists files = [] zip_filename = os.path.join(save_dir, 'upload.zip') if os.path.exists(zip_filename): zip_archive = zipfile.ZipFile(zip_filename, 'r') for filename in zip_archive.namelist(): if filename == 'index.cnxml': continue fp = zip_archive.open(filename, 'r') files.append((filename, fp.read())) fp.close() try: files = get_files_from_zipfile(os.path.join(save_dir, 'upload.zip')) save_cnxml(save_dir, cnxml, files) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(request, e.msg) # Return to preview return HTTPFound(location=request.route_url('preview'), request=request) # Read CNXML try: with open(cnxml_filename, 'rt') as fp: cnxml = fp.read() except IOError: raise HTTPNotFound('index.cnxml not found') # Clean CNXML cnxml = clean_cnxml(cnxml) cnxml = cnxml.decode('utf-8') cnxml = unicode(cnxml) return { 'codemirror': True, 'form': FormRenderer(form), 'cnxml': cnxml, 'transformerror': transformerror, }
def test_doc(self): doc_files=os.listdir(test_folder_name+'doc/') i=0 # Find only .odt files in the testing folder for odt while(i < len(doc_files)): f=doc_files[i] filename, extension = os.path.splitext(f) if(extension != '.doc'): doc_files.remove(f) else: i=i+1 for f in doc_files: original_filename=test_folder_name+'doc/'+f filename, extension = os.path.splitext(original_filename) valid_filename=filename+'.cnxml' output_filename=filename+'.tmp' doc_filename = original_filename diff_filename = filename+'.diff' err_filename = filename+'.err' odt_filename= filename+'.odt' command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+original_filename + ',' + os.getcwd()+'/'+odt_filename + ')"' os.system(command) try: open(valid_filename, 'r') except IOError as e: print('Missing valid file ('+valid_filename+') for testing '+original_filename) return tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) validate_cnxml(cnxml) output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of '+original_filename+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of '+original_filename+', information on these errors are in '+err_filename)
def test_latex(self): latex_files=os.listdir(test_folder_name+'latex/') i=0 while(i < len(latex_files)): f=latex_files[i] filename, extension = os.path.splitext(f) if(extension != ''): latex_files.remove(f) else: i=i+1 for f in latex_files: original_filename=test_folder_name+'latex/'+f filename, extension = os.path.splitext(original_filename) valid_filename=filename+'.cnxml' output_filename=filename+'.tmp' diff_filename = filename+'.diff' err_filename = filename+'.err' fp=open(original_filename, 'r') latex_archive = fp.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) fp.close() output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of '+f+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of '+f+', information on these errors are in '+err_filename)
def test_odt(self): odt_files=os.listdir(test_folder_name+'odt/') i=0 # Find only .odt files in the testing folder for odt while(i < len(odt_files)): f=odt_files[i] filename, extension = os.path.splitext(f) if(extension != '.odt'): odt_files.remove(f) else: i=i+1 for f in odt_files: original_filename=test_folder_name+'odt/'+f filename, extension = os.path.splitext(original_filename) valid_filename=filename+'.cnxml' output_filename=filename+'.tmp' odt_filename = original_filename diff_filename = filename+'.diff' err_filename = filename+'.err' try: open(valid_filename, 'r') except IOError as e: print('Missing valid file ('+valid_filename+') for testing '+original_filename) return tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) validate_cnxml(cnxml) output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of '+original_filename+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of '+original_filename+', information on these errors are in '+err_filename)
filename = sys.argv[1] name, extension = os.path.splitext(filename) if (extension == '.odt' or extension == '.doc'): if (extension == '.doc'): doc_folder = os.getcwd() + '/' + os.path.dirname(name) os.system('./converters/doc2odt -o ' + doc_folder + ' ' + os.getcwd() + '/' + filename) #command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+filename + ',' + os.getcwd()+'/'+name+'.odt' + ')"' #os.system(command) filename = name + '.odt' valid_filename = name + '.cnxml' tree, files, errors = transform(filename) cnxml = clean_cnxml(etree.tostring(tree)) output = open(valid_filename, 'w') output.write(cnxml) output.close() remove_ids(valid_filename) if (extension == '.doc'): os.remove(os.getcwd() + '/' + name + '.odt') elif (extension == '.tex'): valid_filename = name + '.cnxml' fp = open(filename, 'r') latex_archive = fp.read() fp.close() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, filename)
def test_gdocs(self): have_test_file = False try: fp = open('./test_files/gdocs/test_files') fp.close() have_test_file = True except: print('No gdocs test file') doc_files=os.listdir(test_folder_name+'doc/') rids = [ ] i=0 while(i < len(doc_files)): f=doc_files[i] filename, extension = os.path.splitext(f) if(extension != '.doc'): doc_files.remove(f) else: i=i+1 for d in doc_files: try: just_filename=os.path.basename(d) just_filename, extension = os.path.splitext(just_filename) rid = upload_doc(test_folder_name+'doc/'+d, 'application/msword',just_filename) rids.append(rid) except KeyboardInterrupt: exit() except : print('Error uploading '+just_filename+' to gdocs') if(have_test_file): fp = open('./test_files/gdocs/test_files') for url in fp: if(url[0] == '#'): continue match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url) if match_doc_id: rids.append('document:'+match_doc_id.group(1)) fp.close() count = 0 for rid in rids: if(count < len(doc_files)): filename = os.path.basename(doc_files[count]) filename,ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename='./test_files/gdocs/'+filename+'.cnxml' output_filename='./test_files/gdocs/'+filename+'.tmp' diff_filename = './test_files/gdocs/'+filename+'.diff' err_filename = './test_files/gdocs/'+filename+'.err' gdoc_url = construct_url(rid[9:]) rid,original_title = get_gdoc(gdoc_url, './test_files/gdocs') html_filename = './test_files/gdocs/'+rid[9:]+'.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output=open(output_filename,'w') output.write(cnxml) output.close() remove_ids(output_filename) os.remove('./test_files/gdocs/'+rid[9:]+'.htm') process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if(std_output[0] != None and len(std_output[0]) != 0): diff_output=open(diff_filename,'w') diff_output.write(std_output[0]) diff_output.close() print('Differences in the testing of gdoc '+filename+', information on those differences has been placed in '+diff_filename) elif(std_output[1] != None and len(std_output[1]) != 0): err_output=open(err_filename,'w') err_output.write(std_output[1]) err_output.close() print('Error(s) occurred while attempting to test for differences in CNXML output of gdoc '+filename+', information on these errors are in '+err_filename) count = count + 1
def choose_view(request): check_login(request) templatePath = 'templates/choose.pt' form = Form(request, schema=UploadSchema) field_list = [('upload', 'File')] # clear the session if 'transformerror' in request.session: del request.session['transformerror'] if 'title' in request.session: del request.session['title'] # Check for successful form completion if form.validate(): try: # Catch-all exception block # Create a directory to do the conversions now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') # TODO: This has a good chance of being unique, but even so... temp_dir_name = '%s-%s' % (request.session['username'], now_string) save_dir = os.path.join( request.registry.settings['transform_dir'], temp_dir_name ) os.mkdir(save_dir) # Keep the info we need for next uploads. Note that this # might kill the ability to do multiple tabs in parallel, # unless it gets offloaded onto the form again. request.session['upload_dir'] = temp_dir_name if form.data['upload'] is not None: request.session['filename'] = form.data['upload'].filename # Google Docs Conversion # if we have a Google Docs ID and Access token. if form.data['gdocs_resource_id']: gdocs_resource_id = form.data['gdocs_resource_id'] gdocs_access_token = form.data['gdocs_access_token'] form.data['gdocs_resource_id'] = None form.data['gdocs_access_token'] = None (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, \ gdocs_resource_id, \ gdocs_access_token) # HTML URL Import: elif form.data.get('url_text'): url = form.data['url_text'] form.data['url_text'] = None # Build a regex for Google Docs URLs regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r: gdocs_resource_id = r.groups()[0] (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, "document:" + gdocs_resource_id) else: # download html: #html = urllib2.urlopen(url).read() # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] try: import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except urllib2.URLError, e: request['errors'] = ['The URL %s could not be opened' %url,] response = { 'form': FormRenderer(form), } return render_to_response(templatePath, response, request=request) # Office, CNXML-ZIP or LaTeX-ZIP file else: # Save the original file so that we can convert, plus keep it. original_filename = os.path.join( save_dir, form.data['upload'].filename.replace(os.sep, '_')) saved_file = open(original_filename, 'wb') input_file = form.data['upload'].file shutil.copyfileobj(input_file, saved_file) saved_file.close() input_file.close() form.data['upload'] = None # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it try: zip_archive = zipfile.ZipFile(original_filename, 'r') is_zip_archive = ('index.cnxml' in zip_archive.namelist()) # Do we have a latex file? if not is_zip_archive: # incoming latex.zip must contain a latex.tex file, where "latex" is the base name. (latex_head, latex_tail) = os.path.split(original_filename) (latex_root, latex_ext) = os.path.splitext(latex_tail) latex_basename = latex_root latex_filename = latex_basename + '.tex' is_latex_archive = (latex_filename in zip_archive.namelist()) except zipfile.BadZipfile: is_zip_archive = False is_latex_archive = False # ZIP package from previous conversion if is_zip_archive: # Unzip into transform directory zip_archive.extractall(path=save_dir) # Rename ZIP file so that the user can download it again os.rename(original_filename, os.path.join(save_dir, 'upload.zip')) # Read CNXML with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp: cnxml = fp.read() # Convert the CNXML to XHTML for preview html = cnxml_to_htmlpreview(cnxml) with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index: index.write(html) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) # LaTeX elif is_latex_archive: f = open(original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) # OOo / MS Word Conversion else: # Convert from other office format to odt if needed odt_filename = original_filename filename, extension = os.path.splitext(original_filename) if(extension != '.odt'): odt_filename= '%s.odt' % filename command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"' os.system(command) try: fp = open(odt_filename, 'r') fp.close() except IOError as io: raise ConversionError("%s not found" % original_filename) # Convert and save all the resulting files. tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) save_cnxml(save_dir, cnxml, files.items()) # now validate with jing validate_cnxml(cnxml)
from utils import clean_cnxml, escape_system from test_conversion import validate_cnxml, remove_ids url = 'https://docs.google.com/document/d/1tiZR1fhBl3ZQ_UaQ5sRDA3gSs_7LjgtTITkBAGjuTpI/edit' #url='https://docs.google.com/document/d/1Gw9j1J-_d5YQoq6SIc3Az2hiVlwtvVcJkXfYKDR_zBM/edit' match_doc_id = re.match(r'^.*docs\.google\.com/document/d/([^/]+).*$', url) rid = 'document:' + match_doc_id.group(1) print(rid) filename = rid[9:] valid_filename = 'valid.cnxml' gdoc_url = construct_url(rid[9:]) print(gdoc_url) rid, original_title = get_gdoc(url, './') html_filename = './' + rid[9:] + '.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(valid_filename, 'w') output.write(cnxml) output.close()
filename=sys.argv[1] name, extension = os.path.splitext(filename) if(extension == '.odt' or extension == '.doc'): if(extension == '.doc'): doc_folder = os.getcwd()+'/'+os.path.dirname(name) os.system('./converters/doc2odt -o '+doc_folder+' '+os.getcwd()+'/'+filename) #command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+filename + ',' + os.getcwd()+'/'+name+'.odt' + ')"' #os.system(command) filename=name+'.odt' valid_filename=name+'.cnxml' tree, files, errors = transform(filename) cnxml = clean_cnxml(etree.tostring(tree)) output=open(valid_filename,'w') output.write(cnxml) output.close() remove_ids(valid_filename) if(extension == '.doc'): os.remove(os.getcwd()+'/'+name+'.odt') elif(extension == '.tex'): valid_filename=name+'.cnxml' fp = open(filename, 'r') latex_archive = fp.read() fp.close() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, filename)
def enhance(request): check_login(request) session = request.session google_resource_id = "" slideshare_id = "" embed_google = False embed_slideshare = False not_converted = True show_iframe = False form = Form(request, schema=QuestionAnswerSchema) validate_form = form.validate() print form.all_errors() if session.has_key('google-resource-id'): google_resource_id = session['google-resource-id'] if session.has_key('slideshare_id'): slideshare_id = session['slideshare_id'] if fetch_slideshow_status(slideshare_id) == "2": not_converted = False show_iframe = True if google_resource_id!="": embed_google = True if slideshare_id!="": embed_slideshare = True templatePath = "templates/google_ss_preview.pt" if validate_form: introductory_paragraphs = request.POST.get('introductory_paragraphs') question_count=0 cnxml=session["cnxml"]+"""<content><section id="intro-section-title"><title id="introtitle">Introduction</title><para id="introduction-1">"""+introductory_paragraphs+"""</para></section><section id="slides-embed"><title id="slide-embed-title">View the slides</title><figure id="ss-embed-figure"><media id="slideshare-embed" alt="slideshare-embed"><iframe src="http://www.slideshare.net/slideshow/embed_code/"""+slideshare_id+"""" width="425" height="355" /></media></figure></section>""" for i in range(1,6): form_question = request.POST.get('question-'+str(i)) if form_question: form_radio_answer = request.POST.get('radio-'+str(i)) #this give us something like 'answer-1-1'. so our solution is this question_count +=1 if question_count==1: cnxml+="""<section id="test-section"><title>Test your knowledge</title>""" itemlist = "" for j in range(1,10): try: form_all_answers = request.POST.get('answer-'+str(i)+'-'+str(j)) if form_all_answers: itemlist +="<item>" + form_all_answers+"</item>" except: print "No element found" if form_radio_answer: solution = request.POST.get(form_radio_answer) cnxml+="""<exercise id="exercise-"""+str(i)+""""><problem id="problem-"""+str(i)+""""><para id="para-"""+str(i)+"""">"""+str(form_question)+"""<list id="option-list-"""+str(i)+"""" list-type="enumerated" number-style="lower-alpha">"""+str(itemlist)+"""</list></para></problem>""" else: print "ELESE CONDUITION OF radio" solution = request.POST.get('answer-'+str(i)+'-1') cnxml+="""<exercise id="exercise-"""+str(i)+""""><problem id="problem-"""+str(i)+""""><para id="para-"""+str(i)+"""">"""+str(form_question)+"""</para></problem>""" print "FORM RADIO ANSWER",form_radio_answer print "SOLUTION", solution cnxml+=""" <solution id="solution-"""+str(i)+""""> <para id="solution-para-"""+str(i)+"""">"""+solution+"""</para></solution></exercise>""" """form_solution = request.POST.get('solution-'+str(i)) all_post_data = {"data":{"options":form_options,"solution":form_solution,"question":form_question}} for question in all_post_data: options = all_post_data[question]['options'] solution = all_post_data[question]['solution'] asked_question = all_post_data[question]['question'] optionlist="" for option in options: optionlist+="<item>"+option+"</item>""" #cnxml+="""<exercise id="exercise-"""+str(j)+""""><problem id="problem-"""+str(j)+""""><para id="para-"""+str(j)+"""">"""+str(asked_question)+"""<list id="option-list-"""+str(j)+"""" list-type="enumerated" number-style="lower-alpha">"""+str(optionlist)+"""</list></para></problem>""" #cnxml+=""" <solution id="solution-"""+str(j)+""""> <para id="solution-para-"""+str(j)+"""">"""+solution+"""</para></solution></exercise>""" #j+=1 metadata = session['metadata'] if question_count>=1: cnxml += "</section></content></document>" else: cnxml += "</content></document>" workspaces = [(i['href'], i['title']) for i in session['login'].collections] metadata_entry = sword2cnx.MetaData(metadata) zipped_filepath = session['userfilepath'] zip_archive = zipfile.ZipFile(zipped_filepath, 'w') zip_archive.writestr("index.cnxml",cnxml) zip_archive.close() conn = sword2cnx.Connection("http://cnx.org/sword/servicedocument", user_name=session['login'].username, user_pass=session['login'].password, always_authenticate=True, download_service_document=True) collections = [{'title': i.title, 'href': i.href} for i in sword2cnx.get_workspaces(conn)] session['login'].collections = collections workspaces = [(i['href'], i['title']) for i in session['login'].collections] session['workspaces'] = workspaces with open(zipped_filepath, 'rb') as zip_file: deposit_receipt = conn.create( col_iri = workspaces[0][0], metadata_entry = metadata_entry, payload = zip_file, filename = 'upload.zip', mimetype = 'application/zip', packaging = 'http://purl.org/net/sword/package/SimpleZip', in_progress = True) session['dr'] = deposit_receipt session['deposit_receipt'] = deposit_receipt.to_xml() soup = BeautifulSoup(deposit_receipt.to_xml()) data = soup.find("link",rel="edit") edit_iri = data['href'] session['edit_iri'] = edit_iri creator = soup.find('dcterms:creator') username = session['login'].username email = creator["oerdc:email"] url = "http://connexions-oerpub.appspot.com/" post_values = {"username":username,"email":email,"slideshow_id":slideshare_id} data = urllib.urlencode(post_values) google_req = urllib2.Request(url, data) google_response = urllib2.urlopen(google_req) now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') temp_dir_name = '%s-%s' % (request.session['login'].username, now_string) save_dir = os.path.join(request.registry.settings['transform_dir'],temp_dir_name) os.mkdir(save_dir) request.session['upload_dir'] = temp_dir_name cnxml = clean_cnxml(cnxml) save_cnxml(save_dir,cnxml,[]) return HTTPFound(location=request.route_url('metadata')) #return HTTPFound(location=request.route_url('updatecnx')) response = {'form':FormRenderer(form), "slideshare_id":slideshare_id, "google_resource_id":google_resource_id, "embed_google":embed_google, "embed_slideshare":embed_slideshare, "not_converted": not_converted, "show_iframe":show_iframe} return render_to_response(templatePath, response, request=request)
def test_doc(self): doc_files = os.listdir(test_folder_name + 'doc/') i = 0 # Find only .odt files in the testing folder for odt while (i < len(doc_files)): f = doc_files[i] filename, extension = os.path.splitext(f) if (extension != '.doc'): doc_files.remove(f) else: i = i + 1 for f in doc_files: original_filename = test_folder_name + 'doc/' + f filename, extension = os.path.splitext(original_filename) valid_filename = filename + '.cnxml' output_filename = filename + '.tmp' doc_filename = original_filename diff_filename = filename + '.diff' err_filename = filename + '.err' odt_filename = filename + '.odt' command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd( ) + '/' + original_filename + ',' + os.getcwd( ) + '/' + odt_filename + ')"' os.system(command) try: open(valid_filename, 'r') except IOError as e: print('Missing valid file (' + valid_filename + ') for testing ' + original_filename) return tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) validate_cnxml(cnxml) output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of ' + original_filename + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of ' + original_filename + ', information on these errors are in ' + err_filename)
def enhance(request): check_login(request) session = request.session google_resource_id = "" slideshare_id = "" embed_google = False embed_slideshare = False not_converted = True show_iframe = False form = Form(request, schema=QuestionAnswerSchema) validate_form = form.validate() print form.all_errors() if session.has_key('google-resource-id'): google_resource_id = session['google-resource-id'] if session.has_key('slideshare_id'): slideshare_id = session['slideshare_id'] if fetch_slideshow_status(slideshare_id) == "2": not_converted = False show_iframe = True if google_resource_id != "": embed_google = True if slideshare_id != "": embed_slideshare = True templatePath = "templates/google_ss_preview.pt" if validate_form: introductory_paragraphs = request.POST.get('introductory_paragraphs') question_count = 0 cnxml = session[ "cnxml"] + """<content><section id="intro-section-title"><title id="introtitle">Introduction</title><para id="introduction-1">""" + introductory_paragraphs + """</para></section><section id="slides-embed"><title id="slide-embed-title">View the slides</title><figure id="ss-embed-figure"><media id="slideshare-embed" alt="slideshare-embed"><iframe src="http://www.slideshare.net/slideshow/embed_code/""" + slideshare_id + """" width="425" height="355" /></media></figure></section>""" for i in range(1, 6): form_question = request.POST.get('question-' + str(i)) if form_question: form_radio_answer = request.POST.get( 'radio-' + str(i) ) #this give us something like 'answer-1-1'. so our solution is this question_count += 1 if question_count == 1: cnxml += """<section id="test-section"><title>Test your knowledge</title>""" itemlist = "" for j in range(1, 10): try: form_all_answers = request.POST.get('answer-' + str(i) + '-' + str(j)) if form_all_answers: itemlist += "<item>" + form_all_answers + "</item>" except: print "No element found" if form_radio_answer: solution = request.POST.get(form_radio_answer) cnxml += """<exercise id="exercise-""" + str( i ) + """"><problem id="problem-""" + str( i ) + """"><para id="para-""" + str(i) + """">""" + str( form_question ) + """<list id="option-list-""" + str( i ) + """" list-type="enumerated" number-style="lower-alpha">""" + str( itemlist) + """</list></para></problem>""" else: print "ELESE CONDUITION OF radio" solution = request.POST.get('answer-' + str(i) + '-1') cnxml += """<exercise id="exercise-""" + str( i) + """"><problem id="problem-""" + str( i) + """"><para id="para-""" + str( i) + """">""" + str( form_question) + """</para></problem>""" print "FORM RADIO ANSWER", form_radio_answer print "SOLUTION", solution cnxml += """ <solution id="solution-""" + str( i ) + """"> <para id="solution-para-""" + str( i ) + """">""" + solution + """</para></solution></exercise>""" """form_solution = request.POST.get('solution-'+str(i)) all_post_data = {"data":{"options":form_options,"solution":form_solution,"question":form_question}} for question in all_post_data: options = all_post_data[question]['options'] solution = all_post_data[question]['solution'] asked_question = all_post_data[question]['question'] optionlist="" for option in options: optionlist+="<item>"+option+"</item>""" #cnxml+="""<exercise id="exercise-"""+str(j)+""""><problem id="problem-"""+str(j)+""""><para id="para-"""+str(j)+"""">"""+str(asked_question)+"""<list id="option-list-"""+str(j)+"""" list-type="enumerated" number-style="lower-alpha">"""+str(optionlist)+"""</list></para></problem>""" #cnxml+=""" <solution id="solution-"""+str(j)+""""> <para id="solution-para-"""+str(j)+"""">"""+solution+"""</para></solution></exercise>""" #j+=1 metadata = session['metadata'] if question_count >= 1: cnxml += "</section></content></document>" else: cnxml += "</content></document>" workspaces = [(i['href'], i['title']) for i in session['login'].collections] metadata_entry = sword2cnx.MetaData(metadata) zipped_filepath = session['userfilepath'] zip_archive = zipfile.ZipFile(zipped_filepath, 'w') zip_archive.writestr("index.cnxml", cnxml) zip_archive.close() conn = sword2cnx.Connection("http://cnx.org/sword/servicedocument", user_name=session['login'].username, user_pass=session['login'].password, always_authenticate=True, download_service_document=True) collections = [{ 'title': i.title, 'href': i.href } for i in sword2cnx.get_workspaces(conn)] session['login'].collections = collections workspaces = [(i['href'], i['title']) for i in session['login'].collections] session['workspaces'] = workspaces with open(zipped_filepath, 'rb') as zip_file: deposit_receipt = conn.create( col_iri=workspaces[0][0], metadata_entry=metadata_entry, payload=zip_file, filename='upload.zip', mimetype='application/zip', packaging='http://purl.org/net/sword/package/SimpleZip', in_progress=True) session['dr'] = deposit_receipt session['deposit_receipt'] = deposit_receipt.to_xml() soup = BeautifulSoup(deposit_receipt.to_xml()) data = soup.find("link", rel="edit") edit_iri = data['href'] session['edit_iri'] = edit_iri creator = soup.find('dcterms:creator') username = session['login'].username email = creator["oerdc:email"] url = "http://connexions-oerpub.appspot.com/" post_values = { "username": username, "email": email, "slideshow_id": slideshare_id } data = urllib.urlencode(post_values) google_req = urllib2.Request(url, data) google_response = urllib2.urlopen(google_req) now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') temp_dir_name = '%s-%s' % (request.session['login'].username, now_string) save_dir = os.path.join(request.registry.settings['transform_dir'], temp_dir_name) os.mkdir(save_dir) request.session['upload_dir'] = temp_dir_name cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, []) return HTTPFound(location=request.route_url('metadata')) #return HTTPFound(location=request.route_url('updatecnx')) response = { 'form': FormRenderer(form), "slideshare_id": slideshare_id, "google_resource_id": google_resource_id, "embed_google": embed_google, "embed_slideshare": embed_slideshare, "not_converted": not_converted, "show_iframe": show_iframe } return render_to_response(templatePath, response, request=request)
def test_gdocs(self): have_test_file = False try: fp = open('./test_files/gdocs/test_files') fp.close() have_test_file = True except: print('No gdocs test file') doc_files = os.listdir(test_folder_name + 'doc/') rids = [] i = 0 while (i < len(doc_files)): f = doc_files[i] filename, extension = os.path.splitext(f) if (extension != '.doc'): doc_files.remove(f) else: i = i + 1 for d in doc_files: try: just_filename = os.path.basename(d) just_filename, extension = os.path.splitext(just_filename) rid = upload_doc(test_folder_name + 'doc/' + d, 'application/msword', just_filename) rids.append(rid) except KeyboardInterrupt: exit() except: print('Error uploading ' + just_filename + ' to gdocs') if (have_test_file): fp = open('./test_files/gdocs/test_files') for url in fp: if (url[0] == '#'): continue match_doc_id = re.match( r'^.*docs\.google\.com/document/d/([^/]+).*$', url) if match_doc_id: rids.append('document:' + match_doc_id.group(1)) fp.close() count = 0 for rid in rids: if (count < len(doc_files)): filename = os.path.basename(doc_files[count]) filename, ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename = './test_files/gdocs/' + filename + '.cnxml' output_filename = './test_files/gdocs/' + filename + '.tmp' diff_filename = './test_files/gdocs/' + filename + '.diff' err_filename = './test_files/gdocs/' + filename + '.err' gdoc_url = construct_url(rid[9:]) rid, original_title = get_gdoc(gdoc_url, './test_files/gdocs') html_filename = './test_files/gdocs/' + rid[9:] + '.htm' html_file = open(html_filename, 'r') try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) os.remove('./test_files/gdocs/' + rid[9:] + '.htm') process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of gdoc ' + filename + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of gdoc ' + filename + ', information on these errors are in ' + err_filename) count = count + 1
def test_url(self): url_files = os.listdir(test_folder_name + 'url/') i = 0 while (i < len(url_files)): f = url_files[i] filename, extension = os.path.splitext(f) if (extension != ''): url_files.remove(f) else: i = i + 1 for f in url_files: input_file = open(test_folder_name + 'url/' + f, 'r') url = input_file.readline() input_file.close() output_filename = test_folder_name + 'url/' + f + '.cnxml' valid_filename = test_folder_name + 'url/' + f + '.cnxml' output_filename = test_folder_name + 'url/' + f + '.tmp' diff_filename = test_folder_name + 'url/' + f + '.diff' err_filename = test_folder_name + 'url/' + f + '.err' import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] try: import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(output_filename, 'w') output.write(cnxml) output.close() remove_ids(output_filename) process = subprocess.Popen( ['diff', valid_filename, output_filename], shell=False, stdout=subprocess.PIPE) std_output = process.communicate() if (std_output[0] != None and len(std_output[0]) != 0): diff_output = open(diff_filename, 'w') diff_output.write(std_output[0]) diff_output.close() print( 'Differences in the testing of ' + f + ', information on those differences has been placed in ' + diff_filename) elif (std_output[1] != None and len(std_output[1]) != 0): err_output = open(err_filename, 'w') err_output.write(std_output[1]) err_output.close() print( 'Error(s) occurred while attempting to test for differences in CNXML output of ' + f + ', information on these errors are in ' + err_filename) except urllib2.URLError, e: print('URL ' + url + ' could not be opened') quit()
for rid in rids: print(rid) if count < len(doc_files): filename = os.path.basename(doc_files[count]) filename, ext = os.path.splitext(filename) else: filename = rid[9:] valid_filename = "./test_files/gdocs/" + filename + ".cnxml" gdoc_url = construct_url(rid[9:]) rid, original_title = get_gdoc(gdoc_url, "./test_files/gdocs") html_filename = "./test_files/gdocs/" + rid[9:] + ".htm" html_file = open(html_filename, "r") try: html = html_file.read() html_file.flush() finally: html_file.close() cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) output = open(valid_filename, "w") output.write(cnxml) output.close() remove_ids(valid_filename) count = count + 1 os.remove("./test_files/gdocs/" + rid[9:] + ".htm")