def golive(config, shelvedb_paths=[]): if shelvedb_paths: csvfilepath = None for shelvedb_path in shelvedb_paths: columns = [] name = shelvedb_path.rpartition('/')[-1] for dimension in config.dimensions: if name == dimension.name: columns = dimension.all break shelveddb = LRUShelve(shelvedb_path, 2000, readonly=True) fd, csvfilepath = tempfile.mkstemp(suffix='.csv', prefix=name) tmpfile = file(csvfilepath, 'w') csvwriter = UnicodeWriter(tmpfile, delimiter='\t') for key, rows in shelveddb.iterms(): for row in rows: values = [] for i in range(len(columns)): value = row[i] if value == None: value = '' values.append(value) csvwriter.writerow(values) tmpfile.close() os.close(fd) config.UDF_pgcopy(name, columns, '\t', None, '', file(csvfilepath)) shelveddb.close() os.remove(csvfilepath)
def parse_wordlist(wordlistdir='.', outputpath='.', anyxml = False): # MOVE THIS INITIALISATION TO IN THE FOR LOOOP TO AVOID BLANK FILES AT END. TEST. lsummary_out = UnicodeWriter(open(outputpath + 'linguistic-metrics.csv', 'wb'), delimiter=',', quotechar='"') lsummary_out.writerow(["File Name", "Total Words or Phrases", "Total Unique Words or Phrases", "Total Words", "Total Phrases", "Total Unique Words", "Total Unique Phrases", "Types of Word"]) donedirs = [] allwordsphrases = [] gridset = '' for r,dirs,files in os.walk(wordlistdir): # Parse any directory, picking up on either wordlist.xml or any xml files (USE with CARE)! for filename in files: # Assume that xml files are organised in directories according to the Gridset. Will not currently work when parsing Grid user folders (it will do the stats per page) if (anyxml == True and filename.endswith(".xml")) or (anyxml == False and filename.endswith("wordlist.xml")): filepth = os.path.join(r,filename) gridset = os.path.normpath(filepth).split(os.sep)[-2] if gridset not in donedirs: # Got to new directory. MUST BE BETTER WAY OF DOING THIS!!! donedirs.append(gridset) try: # Make directory to output raw data. os.mkdir(outputpath + '/'+ gridset ) except OSError, e: if e.errno != errno.EEXIST: raise if len(donedirs)>1: writeOut(lsummary_out, allwordsphrases, outputpath, donedirs[-2]) # Write raw data and summary data after recursing a directory. allwordsphrases = [] tree = etree.parse(filepth) allwordsphrases += tree.xpath("(//wordlist//word//wordtext)/text()")
def get_LangADict(userdir='',output_file='Lang.csv',auto_translate=False,lang_code='es'): ''' Look for wordlist.xml files and grid.xml files extract any cdata plonking into a translate file for now.. ''' userdir = os.path.normpath(userdir+'/Grids/') wordlist = list() # Get gridsetname for r,d,f in os.walk(userdir): # Parse directories to find Grids directory for files in f: if 'xml' in files: pth = r+'/'+files parser = etree.XMLParser(strip_cdata=False) tree = etree.parse(pth, parser) # Parse the file wordlist = _add_to_list(wordlist, tree.xpath('//commands/command[id = "type"]/parameter[@index = "1"]/text()')) wordlist = _add_to_list(wordlist, tree.xpath('//wordtext/text()')) wordlist = _add_to_list(wordlist, tree.xpath('//tooltip/text()')) wordlist = _add_to_list(wordlist, tree.xpath('//caption/text()')) # now dump all words to a big file.. # wordtext, tooltip, caption #wordFile = UnicodeWriter(open('Language.csv', 'wb'), delimiter=',', quotechar='"') ff = open(output_file, 'wb') #ff.write(codecs.BOM_UTF8) wordFile = UnicodeWriter(ff) for word in wordlist: if auto_translate: wordFile.writerow([word,translate(word, lang_code)]) else: wordFile.writerow([word,'tbc']) return True
def write_to_csv(prefix, tweets): out_csv = UnicodeWriter(open('%s%s' % (prefix, FILENAME), 'wb'), delimiter=',') out_csv.writerow(COLUMNS) for tweet in tweets: row = [] for column in COLUMNS: if column in tweet: row.append(tweet[column]) out_csv.writerow(row)
def write_csv(info_dics, output): """ write infomation to csv """ with open(output, 'w+') as o: fw = UnicodeWriter(o, encoding="utf-8") fw.writerow(info_dics[0].keys()) for d in info_dics: fw.writerow(d.values())
def _combine_csv(self, output): with open(output, 'w+') as o: fw = UnicodeWriter(o) fw.writerow(table_title) for file in self.files: if not file.endswith('.csv'): continue itable = self._read_restruct_csv(file) for i in itable: fw.writerow(i)
def writeOut(lsummary_out, allwordsphrases=[], outputpath='.', gridset=''): # Write data out for the last folder (gridset) encountered - MUST BE A BETTER WAY THAN THIS? uWordsPhrases = uniqueSet(allwordsphrases) # Set of unique words. uwords =[] uphrases = [] words = [] phrases =[] wordtypes =[] wordtypes =[] total_wordsphrases = total_uwordsphrases = total_words = total_phrases = 0 ldata_out = UnicodeWriter(open(outputpath + '/'+ gridset +'/language-data.csv', 'wb'), delimiter=',', quotechar='"') ldata_out.writerow(["WORD", "NUMBER OF WORDS", "COUNT", "TYPE"]) # Output metrics to file. for item in uWordsPhrases: num_words = len(item.split()) item_count = allwordsphrases.count(item) if num_words == 1: # Single word word_type = nltk.pos_tag(item)[-1][-1] #word_type_help = nltk.help.upenn_tagset(word_type) # MAYBE CONVERT TAGS INTO MORE USEFUL WORDS?! ldata_out.writerow([item, str(num_words), str(item_count), word_type]) uwords.append(item) wordtypes.append(word_type) elif num_words > 1: # Phrase nltk_words = nltk.word_tokenize(item) word_pos = nltk.pos_tag(nltk_words) ### HOW TO DEAL WITH PHRASES??? word_types = [x[1] for x in word_pos] ldata_out.writerow([item, str(num_words), str(item_count), " ,".join(word_types)]) # HOW TO OUTPUT EACH POS TO A COLUMN??? uphrases.append(item) for item in allwordsphrases: num_words = len(item.split()) if num_words == 1: words.append(item) elif num_words > 1: phrases.append(item) uword_types = countDuplicatesInList(wordtypes) total_wordsphrases = len(allwordsphrases) total_uwordsphrases = len(uWordsPhrases) total_uwords = len(uwords) total_uphrases = len(uphrases) total_words = len(words) total_phrases = len(phrases) #["File Name", "Total Words or Phrases", "Total Unique Words or Phrases", "Total Words", "Total Phrases", "Total Unique Words", "Total Unique Phrases", "Types of Word"]) lsummary_out.writerow([gridset, str(total_wordsphrases), str(total_uwordsphrases), str(total_words), str(total_phrases), str(total_uwords), str(total_uphrases), ', '.join(map(str, uword_types))]) raw_words_out = open(outputpath + '/'+ gridset +'/raw-unique-words.text', 'wb') raw_words_out.writelines('\n'.join(uWordsPhrases).encode('utf-8')) raw_phrases_out = open(outputpath + '/'+ gridset +'/raw-unique-phrases.txt', 'wb') raw_phrases_out.writelines('\n'.join(uphrases).encode('utf-8')) raw_words_out = open(outputpath + '/'+ gridset +'/raw-wordsphrases.text', 'wb') raw_words_out.writelines('\n'.join(allwordsphrases).encode('utf-8'))
def get_LangADict(userdir="", output_file="Lang.csv", auto_translate=False, lang_code="es"): """ Look for wordlist.xml files and grid.xml files extract any cdata plonking into a translate file for now.. """ userdir = os.path.normpath(userdir + "/Grids/") wordlist = list() # Get gridsetname for r, d, f in os.walk(userdir): # Parse directories to find Grids directory for files in f: if "xml" in files: pth = r + "/" + files parser = etree.XMLParser(strip_cdata=False) tree = etree.parse(pth, parser) # Parse the file wordlist = _add_to_list( wordlist, tree.xpath('//commands/command[id = "type"]/parameter[@index = "1"]/text()') ) wordlist = _add_to_list(wordlist, tree.xpath("//wordtext/text()")) wordlist = _add_to_list(wordlist, tree.xpath("//tooltip/text()")) wordlist = _add_to_list(wordlist, tree.xpath("//caption/text()")) # now dump all words to a big file.. # wordtext, tooltip, caption # wordFile = UnicodeWriter(open('Language.csv', 'wb'), delimiter=',', quotechar='"') ff = open(output_file, "wb") # ff.write(codecs.BOM_UTF8) wordFile = UnicodeWriter(ff) goog_txt = "" for word in wordlist: if auto_translate: wordFile.writerow([word, translate(word, lang_code)]) else: wordFile.writerow([word, "tbc"]) if auto_translate: goog_trs = translate(goog_txt[1:], lang_code) goog_wds = goog_trs.split("|") for n in range(0, len(wordlist)): wordFile.writerow([wordlist[n], goog_wds[n]]) return True
try: label = tag[u'android:label'] except KeyError: label = app_label if label.startswith(u'@ref/'): label = get_label(label[len(u'@ref/'):], pkg, apk) name = tag[u'android:name'] if name.startswith('.'): name = pkg + name star = apk.split('/')[-1][:-len('.apk')] print('[%4d] %s %s "%s"' % (i, star, name, label)) apps.append([star, name, label]) for apk in glob.glob('../apks/wear/*.apk'): i += 1 run(i, apk) with open('pkg_wfs_label.csv', 'wb') as f: writer = UnicodeWriter(f) # writer = csv.writer(f, encoding='utf-8') for wf in apps: print('[write] %s' % wf) writer.writerow(wf) with open('pkg_wfs_label.csv', 'rb') as f: reader = UnicodeReader(f) # reader = (f, encoding='utf-8') for row in reader: print('[read] %s' % row)
def parse_grids(gridxml='grid.xml',outputpath='.',userdir='.', excludehidden=False,outputwordlists=True, ignoregrids=[],ignorecells=[], blackliststyles=[],singlefile=False, outputcsv=False, word_length=2, merge_lists=False): # gridxml,outputpath,userdir,excludehidden,outputwordlists, ignoregrids, ignorecells, singlefile, outputcsv ''' Parse Grid.xml files recursively. Extract Vocabulary and store it out as CSV files and/or as Wordlist files ''' # Get gridsetname for r,d,f in os.walk(userdir): # Parse directories to find Grids directory #pdb.set_trace() if "Grids" in d: gridsetname=os.path.split(os.path.normpath(r))[1] # outputing to Grid folders or other output folder? # Check to see if output directory specified, if not output to the Grid directories. if (outputpath == '.'): outinplace = True else: outputpath = outputpath + '/' outinplace=False # outputing to single file? if(singlefile): if(outputwordlists): file_out = open(outputpath + gridsetname +'.xml', 'wb') wordlist = etree.Element("wordlist") if(outputcsv): vocabWriter = UnicodeWriter(open(outputpath + 'vocab.csv', 'wb'), delimiter=',', quotechar='"') for r,d,f in os.walk(userdir): # Parse any directory, only picking up on grid.xml files. page = os.path.split(r)[1] if page not in ignoregrids: for files in f: if files.endswith("grid.xml"): #### ******** ITTERATE OVER ALL GRID FILES. ********** ######## pth = os.path.join(r,files) existing_wordlist = False if (outinplace): # Check to see if output directory specified, if not output to the Grid directories. outputpath = r + '/' parser = etree.XMLParser(strip_cdata=False) tree = etree.parse(pth, parser) # Parse the file if(tree.xpath(".//licencekey") == []): # does it have a licencekey? Bugger if it has readpictures = True else: readpictures = False # So this grid is licenced. Dont try and read the pictures cells = tree.xpath(".//cell") if(singlefile == False): if(outputwordlists): wordlist = etree.Element("wordlist") if (outputcsv): vocabWriter = UnicodeWriter(open(outputpath + page + '.csv', 'wb'), delimiter=',', quotechar='"') for cell in cells: #### ******* Itterate over all cells in a grid ********** ######### autocells = cell.xpath("(.//autocell)/text()") if '1' in autocells: # Cell is a wordlist cell existing_wordlist = True else: tt = ''.join(cell.xpath("(.//caption)/text()")) style = ''.join(cell.xpath(".//stylepreset/text()")) command_id = cell.xpath(".//id/text()") # Check the /Paramter/ID value to check if 'type' - i.e. being sent to the text bar. ## NOT PERFECT - need to grab text sent to text bar rahter than caption... if ("type" in command_id or "speaknow" in command_id) and ("jump.to" not in command_id) and ("jump.back" not in command_id): # We are only interested if text is being sent to the text bar or being spoken directly. We are not interested in jump cell text. #if tt != '': # UNCOMMENT TO INCLUDE ALL CELLS WITH A CAPTION. if style not in blackliststyles: # Implement white list too? if tt not in ignorecells: if ''.join(cell.xpath(".//hidden/text()")) != '1': cellchildren = cell.getchildren() vocabtext = picture = '' for cellchild in cellchildren: # Check if the cell has a type of speak command and if so save the text(s). commands = cellchild.getchildren() for command in commands: id = command.find("id") if id is not None: if id.text == "type" or "speaknow": parameters = command.findall("parameter") for parameter in parameters: if "1" in parameter.xpath(".//@index"): vocabtext = parameter.text.strip() # Grid seems to add Asquiggle charchters to the text if there is a space in the text output. Luckily python strip ditches them! if(outputwordlists and len(vocabtext)>word_length and vocabtext !=[] and re.match("[A-Za-z]", vocabtext)): # Ignore writing if no text, or text is smaller than min word size, or text is not letters. word = etree.SubElement(wordlist, "word") wordtext = etree.SubElement(word, "wordtext") wordtext.text = etree.CDATA(vocabtext) # Check if the cell has a picture (symbol) and if so save the picture path. picture = ''.join(cell.xpath(".//picture/text()")) if ((readpictures==True) and (picture != [])): # ignore if no picture if(outputwordlists): picturefile = etree.SubElement(word, "picturefile") picturefile.text = picture if (outputcsv): vocabWriter.writerow([pth,cell.get('x'),cell.get('y'),vocabtext,picture]) # Add in data from any existing wordlists! if existing_wordlist == True and merge_lists==True: wordlistpath = os.path.dirname(pth) + "\wordlist.xml" if os.path.isfile(wordlistpath): # wordlist exists for this grid and is referenced in the grid. Need to add the wordlist data to the uber wordlist. wordlistwordlist = etree.parse(wordlistpath) root = wordlistwordlist.getroot() for wordx in root.iterfind("word"): # MORE EFFICIENT METHOD??? if outputwordlists: wordlist.append(wordx) # HOW TO MAKE IT CDATA? if outputcsv: vocabWriter.writerow([pth,"wordlist","wordlist",str(wordx.findtext("wordtext")),str(wordx.findtext("picturefile"))]) ######### ******* Write out data to many files ********** ########### if(singlefile == False): if(wordlist.findall('./word/')!=[]): # Do not write wordlists if they are blank. if (existing_wordlist==False or (existing_wordlist==True and merge_lists==True)): # Do not write worlists if there is a current list and not set to merge if(outinplace): if(outputwordlists): # Writing multiple files to Grid folders file_out = open( outputpath + 'wordlist.xml', 'wb') file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8')) else: if(outputwordlists): # writing multiple files to output folder (make a folder for the grids, name them by the page). try: os.mkdir(outputpath + '/'+ gridsetname) except OSError, e: if e.errno != errno.EEXIST: raise file_out = open(outputpath + '/' + gridsetname + '/' + page +'.xml', 'wb') file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))
def bucket_download(request, id): # TODO: access control message = "" bucket = get_bucket(request, id) if request.method == 'POST': try: zipfile_name = request.POST['zipfile_name'] except KeyError: zipfile_name = bucket.default_zipfile_name() if zipfile_name == "": messages.error(request, 'No zip file name selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) if not zipfile_name.endswith(".zip"): zipfile_name += ".zip" download_choice = request.POST['download_choice'] channel_choice = request.POST['channel_choice'] #meta_option= request.POST['meta_option'] quotechar_char = '"' #delimiter= request.POST['delimiter'] #if delimiter =="tab": # delimiter_char = '\t' #elif delimiter =="comma": # delimiter_char = ',' #else: delimiter_char = ',' #get selected fields field_selected = [] for item in request.POST.items(): if (item[1] == "on" and item[0].startswith("chk:")): field_selected.append(item[0]) message += item[0] + "\n" if (download_choice == "0" or download_choice == "2") and len(field_selected) == 0: messages.error(request, 'No fields selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) meta_selected = {} for field in field_selected: parts = field.split(":") if not parts[1] in meta_selected: meta_selected[parts[1]] = [] parameter = parts[1] + ":" + parts[2] meta_selected[parts[1]].append([parts[2], request.POST[parameter]]) #get selected channels channel_selected = [] channel_headers = [] for item in request.POST.items(): if (item[1] == "on" and item[0].startswith("channel:")): channel_selected.append(item[0]) message += item[0] + "\n" if (channel_choice == "1" and len(channel_selected) == 0): messages.error(request, 'No channels selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) channel_download = [] channel_selected.sort() trials_download = [] for ch in channel_selected: parts = ch.split(":") channel_download.append([parts[1], parts[2]]) channel_headers.append("Trial %s:Channel %s" % (parts[1], parts[2])) if not parts[1] in trials_download: trials_download.append(parts[1]) filenames = {} # create a temporary folder to store files from time import time tempdir = settings.EXPLORER_TEMPORARY_FOLDER + "/" + str( time()).replace('.', '') try: os.makedirs(tempdir) except OSError, err: messages.error( request, 'Failed to create folder for storing downloaded files.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) # # create meta data if the user has chosen to do so # if (download_choice == "0" or download_choice == "2"): #create trials mate data file and out it into the temp zip file full_filename = "%s/trials.csv" % tempdir filenames["trials.csv"] = full_filename metaWriter = UnicodeWriter(open(full_filename, "w"), delimiter=delimiter_char, doublequote='false', escapechar='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) #output trials #output headers headers = ["Trial:ID"] for key, value in meta_selected.items(): if not key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor', 'EmgSensor', 'SonoSensor', 'Channel', 'EmgChannel', 'SonoChannel', 'PressureChannel', 'ForceChannel', 'StrainChannel', 'KinematicsChannel', 'EventChannel'): for v in value: headers.append(v[1]) metaWriter.writerow(headers) objects = {} for trial in bucket.trials.all(): values = [trial.id] objects["Session"] = trial.session objects["Experiment"] = trial.session.experiment objects["Study"] = trial.session.experiment.study objects["Subject"] = trial.session.experiment.subject objects["Trial"] = trial for key, value in meta_selected.items(): if key in objects: for v in value: s = getattr(objects[key], v[0]) if hasattr(s, 'split'): ss = s.split('\r\n') if len(ss) > 1: s = ' '.join(ss) values.append(s) metaWriter.writerow(values) #output channels #generate channel headers headers = ["Channel:ID"] for key, value in meta_selected.items(): #generate headers meta data if key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor', 'EmgSensor', 'SonoSensor', 'Channel', 'EmgChannel', 'SonoChannel', 'PressureChannel', 'ForceChannel', 'StrainChannel', 'KinematicsChannel', 'EventChannel'): for v in value: headers.append(v[1]) for key, value in meta_selected.items(): #generate headers for 2 meta data (specifically for crystal2 in sono data if key in ('Sensor', 'SonoSensor'): for v in value: headers.append('Sensor 2:%s' % v[1]) channel_types = [ 'strainchannel', 'forcechannel', 'pressurechannel', 'kinematicschannel' ] for trial in bucket.trials.all(): #trial_name = trial.title.replace('.', '').replace(',', '').replace(' ', '_').strip().lower() #filename = "trial_%d_%s_channels.csv" % (trial.id, trial_name) #full_filename = "%s/trial_%d_%s_channels.csv" % (tempdir, trial.id,trial_name) filename = "trial_%d_channels.csv" % trial.id full_filename = "%s/trial_%d_channels.csv" % (tempdir, trial.id) filenames[filename] = full_filename f = open(full_filename, "w") metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) metaWriter.writerow(headers) objects = {} for lineup in trial.session.channellineup_set.all(): objects = {} ch = lineup.channel if ch == None: values = ["deadchannel"] else: objects["Channel"] = lineup.channel values = [ch.id] objects["Setup"] = ch.setup for channel_type in channel_types: if hasattr(ch, channel_type): objects["Sensor"] = getattr( ch, channel_type).sensor if hasattr(ch.setup, 'emgsetup'): objects["EmgSetup"] = ch.setup.emgsetup if hasattr(ch.setup, 'sonosetup'): objects["SonoSetup"] = ch.setup.sonosetup if hasattr(ch, 'emgchannel'): objects["EmgChannel"] = ch.emgchannel objects["Sensor"] = ch.emgchannel.sensor objects["EmgSensor"] = ch.emgchannel.sensor if hasattr(ch, 'eventchannel'): objects["EventChannel"] = ch.eventchannel if hasattr(ch, 'pressurechannel'): objects["PressureChannel"] = ch.pressurechannel if hasattr(ch, 'strainchannel'): objects["StrainChannel"] = ch.strainchannel if hasattr(ch, 'forcechannel'): objects["ForceChannel"] = ch.forcechannel if hasattr(ch, 'kinematicschannel'): objects["KinematicsChannel"] = ch.kinematicschannel if hasattr(ch, 'sonochannel'): objects["SonoChannel"] = ch.sonochannel objects["Sensor"] = ch.sonochannel.crystal1 objects["SonoSensor"] = ch.sonochannel.crystal1 if hasattr(ch, 'emgchannel'): objects["Sensor"] = ch.emgchannel.sensor for key, value in meta_selected.items(): if key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor', 'EmgSensor', 'SonoSensor', 'Channel', 'EmgChannel', 'SonoChannel', 'PressureChannel', 'ForceChannel', 'StrainChannel', 'KinematicsChannel', 'EventChannel'): for v in value: s = '' if key in objects and objects[key] != None: s = getattr(objects[key], v[0]) if hasattr( s, 'split'): #check if s is a string ss = s.split('\r\n') if len(ss) > 1: s = ' '.join(ss) values.append(s) #output the second crystal sensor information if it is sono channel if hasattr(ch, 'sonochannel'): objects["Sensor"] = ch.sonochannel.crystal2 objects["SonoSensor"] = ch.sonochannel.crystal2 for key, value in meta_selected.items(): if key in ('Sensor', 'SonoSensor'): for v in value: s = '' if key in objects: s = getattr(objects[key], v[0]) if hasattr(s, 'split'): ss = s.split('\r\n') if len(ss) > 1: s = ' '.join(ss) values.append(s) metaWriter.writerow(values) f.close() # # put data files into the tmp zip # data_files = {} if (download_choice == "1" or download_choice == "2"): # download all trial files if channel_choice == "0": for trial in bucket.trials.all(): #check if there is a data file if (trial.data_file != None and trial.data_file != ""): filename = "trial_%d.dat" % trial.id full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file) data_files[filename] = full_filename else: # download selected channels filename = "channels.dat" full_filename = "%s/channels.dat" % tempdir filenames[filename] = full_filename f = open(full_filename, "w") metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) metaWriter.writerow(channel_headers) trial_readers = {} total_trial_number = 0 for trial in bucket.trials.all(): #check if there is a data file if (trial.data_file != None and trial.data_file != "" and str(trial.id) in trials_download): full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file) csvfile = open(full_filename, "rU") dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = csv.reader(csvfile, dialect) trial_readers[str(trial.id)] = { "reader": reader, "hasmore": True, "file": csvfile } total_trial_number += 1 rows = {} newrow = [] finished_file_number = 0 while finished_file_number < total_trial_number: rows.clear() for key in trial_readers: try: if trial_readers[key]["hasmore"]: row = trial_readers[key]["reader"].next() rows[key] = row except StopIteration: finished_file_number += 1 trial_readers[key]["hasmore"] = False trial_readers[key]["file"].close() newrow = [] for ch in channel_download: if ch[0] in rows: if int(ch[1]) > len(rows[ch[0]]): messages.error( request, "Error in channel lineup positions for trial: %s" % ch[0]) c = RequestContext( request, {'title': 'FeedDB Explorer'}) return render_to_response( 'explorer/base.html', c) newrow.append(rows[ch[0]][int(ch[1]) - 1]) else: newrow.append('') metaWriter.writerow(newrow) f.close() response = send_zipfile(request, filenames, data_files, zipfile_name) for file, full_file in filenames.items(): os.remove(full_file) os.rmdir(tempdir) return response
def translate_grids(gridxml='grid.xml',outputpath='.',userdir='.', ignoregrids=[],ignorecells=[], blackliststyles=[]): ''' Parse Grid.xml files recursively. Lookup each word and replace with foreign word ''' # Get gridsetname for r,d,f in os.walk(userdir): # Parse directories to find Grids directory #pdb.set_trace() if "Grids" in d: gridsetname=os.path.split(os.path.normpath(r))[1] # outputing to Grid folders or other output folder? # Check to see if output directory specified, if not output to the Grid directories. if (outputpath == '.'): outinplace = True else: outputpath = outputpath + '/' outinplace=False for r,d,f in os.walk(userdir): # Parse any directory, only picking up on grid.xml files. page = os.path.split(r)[1] if page not in ignoregrids: for files in f: if files.endswith("grid.xml"): pth = os.path.join(r,files) if (outinplace): # Check to see if output directory specified, if not output to the Grid directories. outputpath = r + '/' parser = etree.XMLParser(strip_cdata=False) tree = etree.parse(pth, parser) # Parse the file if(tree.xpath(".//licencekey") == []): # does it have a licencekey? Bugger if it has readpictures = True else: readpictures = False # So this grid is licenced. Dont try and read the pictures cells = tree.xpath(".//cell") if(singlefile == False): if(outputwordlists): wordlist = etree.Element("wordlist") if (outputcsv): vocabWriter = UnicodeWriter(open(outputpath + page + '.csv', 'wb'), delimiter=',', quotechar='"') # Add in data from any existing wordlists! wordlistpath = os.path.dirname(pth) + "\wordlist.xml" if os.path.isfile(wordlistpath): # wordlist exists for this grid. Need to add the wordlist data to the uber wordlist. wordlistwordlist = etree.parse(wordlistpath) root = wordlistwordlist.getroot() for wordx in root.iterfind("word"): # MORE EFFICIENT METHOD??? if outputwordlists: wordlist.append(wordx) # HOW TO MAKE IT CDATA? if outputcsv: vocabWriter.writerow([pth,"wordlist","wordlist",str(wordx.findtext("wordtext")),str(wordx.findtext("picturefile"))]) for cell in cells: tt = ''.join(cell.xpath("(.//caption)/text()")) style = ''.join(cell.xpath(".//stylepreset/text()")) command_id = cell.xpath(".//id/text()") # Check the /Paramter/ID value to check if 'type' - i.e. being sent to the text bar. ## NOT PERFECT - need to grab text sent to text bar rahter than caption... if "type" in command_id or "speaknow" in command_id: # We are only interested if text is being sent to the text bar or being spoken directly. #if tt != '': # UNCOMMENT TO INCLUDE ALL CELLS WITH A CAPTION. if style not in blackliststyles: # Implement white list too? if tt not in ignorecells: if ''.join(cell.xpath(".//hidden/text()")) != '1': if(outputwordlists): word = etree.SubElement(wordlist, "word") cellchildren = cell.getchildren() vocabtext = picture = '' for cellchild in cellchildren: # Check if the cell has a type of speak command and if so save the text(s). commands = cellchild.getchildren() for command in commands: id = command.find("id") if id is not None: if id.text == "type" or "speaknow": parameters = command.findall("parameter") for parameter in parameters: if "1" in parameter.xpath(".//@index"): vocabtext = parameter.text.strip() # Grid seems to add Asquiggle charchters to the text if there is a space in the text output. Luckily python strip ditches them! if(outputwordlists): wordtext = etree.SubElement(word, "wordtext") wordtext.text = etree.CDATA(vocabtext) # Check if the cell has a picture (symbol) and if so save the picture path. ## Potential for blank words, if cell has symbol, but no text. What to do about this??? picture = ''.join(cell.xpath(".//picture/text()")) if ((readpictures==True) and (picture != [])): if(outputwordlists): picturefile = etree.SubElement(word, "picturefile") picturefile.text = picture if (outputcsv): vocabWriter.writerow([pth,cell.get('x'),cell.get('y'),vocabtext,picture]) if(singlefile == False): if(outinplace): if(outputwordlists): # Writing multiple files to Grid folders file_out = open( outputpath + 'wordlist.xml', 'wb') file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8')) else: if(outputwordlists): # writing multiple files to output folder (make a folder for the grids, name them by the page). try: os.mkdir(outputpath + '/'+ gridsetname) except OSError, e: if e.errno != errno.EEXIST: raise file_out = open(outputpath + '/' + gridsetname + '/' + page +'.xml', 'wb') file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))
def bucket_download(request, id): # TODO: access control message="" bucket = get_bucket(request, id) if request.method=='POST': try: zipfile_name = request.POST['zipfile_name'] except KeyError: zipfile_name = bucket.default_zipfile_name() if zipfile_name == "": messages.error(request, 'No zip file name selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) if not zipfile_name.endswith(".zip"): zipfile_name +=".zip" download_choice= request.POST['download_choice'] channel_choice = request.POST['channel_choice'] #meta_option= request.POST['meta_option'] quotechar_char='"' #delimiter= request.POST['delimiter'] #if delimiter =="tab": # delimiter_char = '\t' #elif delimiter =="comma": # delimiter_char = ',' #else: delimiter_char = ',' #get selected fields field_selected = [] for item in request.POST.items(): if(item[1]=="on" and item[0].startswith("chk:")): field_selected.append(item[0]) message += item[0]+"\n" if (download_choice=="0" or download_choice=="2") and len(field_selected) ==0: messages.error(request, 'No fields selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) meta_selected = {} for field in field_selected: parts=field.split(":") if not parts[1] in meta_selected: meta_selected[parts[1]]=[] parameter=parts[1]+":"+parts[2] meta_selected[parts[1]].append([parts[2],request.POST[parameter]]) #get selected channels channel_selected = [] channel_headers=[] for item in request.POST.items(): if(item[1]=="on" and item[0].startswith("channel:")): channel_selected.append(item[0]) message += item[0]+"\n" if (channel_choice=="1" and len(channel_selected) ==0): messages.error(request, 'No channels selected.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) channel_download = [] channel_selected.sort() trials_download =[] for ch in channel_selected: parts=ch.split(":") channel_download.append([parts[1], parts[2]]) channel_headers.append("Trial %s:Channel %s" % (parts[1], parts[2])) if not parts[1] in trials_download: trials_download.append(parts[1]) filenames={} # create a temporary folder to store files from time import time tempdir = settings.EXPLORER_TEMPORARY_FOLDER+"/"+str(time()).replace('.', '') try: os.makedirs(tempdir) except OSError, err: messages.error(request, 'Failed to create folder for storing downloaded files.') c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) # # create meta data if the user has chosen to do so # if (download_choice=="0" or download_choice=="2"): #create trials mate data file and out it into the temp zip file full_filename = "%s/trials.csv" % tempdir filenames["trials.csv"]=full_filename metaWriter = UnicodeWriter(open(full_filename,"w"), delimiter=delimiter_char, doublequote='false' , escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) #output trials #output headers headers=["Trial:ID"] for key, value in meta_selected.items(): if not key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel', 'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'): for v in value: headers.append( v[1] ) metaWriter.writerow(headers) objects={} for trial in bucket.trials.all(): values=[trial.id] objects["Session"]= trial.session objects["Experiment"]=trial.session.experiment objects["Study"]=trial.session.experiment.study objects["Subject"]=trial.session.experiment.subject objects["Trial"]=trial for key, value in meta_selected.items(): if key in objects: for v in value: s=getattr(objects[key], v[0]) if hasattr(s,'split'): ss=s.split('\r\n') if len(ss)>1: s=' '.join(ss) values.append(s) metaWriter.writerow(values) #output channels #generate channel headers headers=["Channel:ID"] for key, value in meta_selected.items(): #generate headers meta data if key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel', 'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'): for v in value: headers.append( v[1] ) for key, value in meta_selected.items(): #generate headers for 2 meta data (specifically for crystal2 in sono data if key in('Sensor','SonoSensor'): for v in value: headers.append( 'Sensor 2:%s' % v[1] ) channel_types = ['strainchannel','forcechannel','pressurechannel','kinematicschannel'] for trial in bucket.trials.all(): #trial_name = trial.title.replace('.', '').replace(',', '').replace(' ', '_').strip().lower() #filename = "trial_%d_%s_channels.csv" % (trial.id, trial_name) #full_filename = "%s/trial_%d_%s_channels.csv" % (tempdir, trial.id,trial_name) filename = "trial_%d_channels.csv" % trial.id full_filename = "%s/trial_%d_channels.csv" % (tempdir, trial.id) filenames[filename]=full_filename f = open(full_filename,"w") metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) metaWriter.writerow(headers) objects={} for lineup in trial.session.channellineup_set.all(): objects={} ch=lineup.channel if ch == None: values=["deadchannel"] else: objects["Channel"] = lineup.channel values=[ch.id] objects["Setup"]= ch.setup for channel_type in channel_types: if hasattr(ch,channel_type): objects["Sensor"] = getattr(ch, channel_type).sensor if hasattr(ch.setup, 'emgsetup'): objects["EmgSetup"] = ch.setup.emgsetup if hasattr(ch.setup, 'sonosetup'): objects["SonoSetup"] = ch.setup.sonosetup if hasattr(ch,'emgchannel'): objects["EmgChannel"] = ch.emgchannel objects["Sensor"] = ch.emgchannel.sensor objects["EmgSensor"] = ch.emgchannel.sensor if hasattr(ch,'eventchannel'): objects["EventChannel"] = ch.eventchannel if hasattr(ch,'pressurechannel'): objects["PressureChannel"] = ch.pressurechannel if hasattr(ch,'strainchannel'): objects["StrainChannel"] = ch.strainchannel if hasattr(ch,'forcechannel'): objects["ForceChannel"] = ch.forcechannel if hasattr(ch,'kinematicschannel'): objects["KinematicsChannel"] = ch.kinematicschannel if hasattr(ch,'sonochannel'): objects["SonoChannel"] = ch.sonochannel objects["Sensor"] = ch.sonochannel.crystal1 objects["SonoSensor"] = ch.sonochannel.crystal1 if hasattr(ch,'emgchannel'): objects["Sensor"] = ch.emgchannel.sensor for key, value in meta_selected.items(): if key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel', 'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'): for v in value: s='' if key in objects and objects[key]!=None: s=getattr(objects[key], v[0]) if hasattr(s,'split'): #check if s is a string ss=s.split('\r\n') if len(ss)>1: s=' '.join(ss) values.append(s) #output the second crystal sensor information if it is sono channel if hasattr(ch,'sonochannel'): objects["Sensor"] = ch.sonochannel.crystal2 objects["SonoSensor"] = ch.sonochannel.crystal2 for key, value in meta_selected.items(): if key in('Sensor','SonoSensor'): for v in value: s='' if key in objects: s=getattr(objects[key], v[0]) if hasattr(s,'split'): ss=s.split('\r\n') if len(ss)>1: s=' '.join(ss) values.append(s) metaWriter.writerow(values) f.close() # # put data files into the tmp zip # data_files = {} if (download_choice=="1" or download_choice=="2"): # download all trial files if channel_choice=="0": for trial in bucket.trials.all(): #check if there is a data file if(trial.data_file!=None and trial.data_file!=""): filename = "trial_%d.dat" % trial.id full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file) data_files[filename]=full_filename else: # download selected channels filename = "channels.dat" full_filename = "%s/channels.dat" % tempdir filenames[filename]=full_filename f = open(full_filename,"w") metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL) metaWriter.writerow(channel_headers) trial_readers={} total_trial_number=0 for trial in bucket.trials.all(): #check if there is a data file if(trial.data_file!=None and trial.data_file!="" and str(trial.id) in trials_download ): full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file) csvfile = open(full_filename,"rU") dialect = csv.Sniffer().sniff(csvfile.read(1024)) csvfile.seek(0) reader = csv.reader(csvfile, dialect) trial_readers[str(trial.id)]={"reader":reader,"hasmore":True,"file":csvfile} total_trial_number += 1 rows ={} newrow=[] finished_file_number=0 while finished_file_number<total_trial_number: rows.clear() for key in trial_readers: try: if trial_readers[key]["hasmore"]: row = trial_readers[key]["reader"].next() rows[key] = row except StopIteration: finished_file_number += 1 trial_readers[key]["hasmore"]=False trial_readers[key]["file"].close() newrow=[] for ch in channel_download: if ch[0] in rows: if int(ch[1]) > len(rows[ch[0]]): messages.error(request, "Error in channel lineup positions for trial: %s" % ch[0]) c = RequestContext(request, {'title': 'FeedDB Explorer'}) return render_to_response('explorer/base.html', c) newrow.append(rows[ch[0]][int(ch[1])-1]) else: newrow.append('') metaWriter.writerow(newrow) f.close() response=send_zipfile(request, filenames,data_files, zipfile_name) for file, full_file in filenames.items(): os.remove(full_file) os.rmdir(tempdir) return response
title_words = set([stemmer.stemWord(word.lower()) for word in title.split() if word not in stopwords ]) if name_words == title_words: wiki_match, confidence = wikipedia_match(ann) except Exception as e: import pdb pdb.set_trace() if wiki_match: outfile_name = new_annotations if close_match: close_match_obj = close_match.split('/')[-1] wiki_match_obj = wiki_match.split('/')[-1] if close_match_obj != wiki_match_obj: outfile_name = different_annotations else: print name, 'già matchato con: ', close_match continue with open(outfile_name, 'a+') as outfile: writer = UnicodeWriter(outfile) writer.writerow([subject_url, name, wiki_match, unicode(confidence) ])