Python UnicodeWriter.writerow Exemples, unicodecsv.UnicodeWriter.writerow Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : offdimetlmr.py Projet : xiufengliu/ETLMR

def golive(config, shelvedb_paths=[]):
    if shelvedb_paths:
        csvfilepath = None
        for shelvedb_path in shelvedb_paths:
            columns = []
            name = shelvedb_path.rpartition('/')[-1]
            for dimension in config.dimensions:
                if name == dimension.name:
                    columns = dimension.all
                    break
            shelveddb = LRUShelve(shelvedb_path, 2000, readonly=True)
            fd, csvfilepath = tempfile.mkstemp(suffix='.csv', prefix=name)
            tmpfile = file(csvfilepath, 'w')
            csvwriter = UnicodeWriter(tmpfile, delimiter='\t')
            for key, rows in shelveddb.iterms():
                for row in rows:
                    values = []
                    for i in range(len(columns)):
                        value = row[i]
                        if value == None:
                            value = ''
                        values.append(value)
                    csvwriter.writerow(values)
            tmpfile.close()
            os.close(fd)
            config.UDF_pgcopy(name, columns, '\t', None, '', file(csvfilepath))
            shelveddb.close()
            os.remove(csvfilepath)

Exemple #2

0

Afficher le fichier

Fichier : wordlistMetrics.py Projet : simonjudge/AAC-Tools

def parse_wordlist(wordlistdir='.', outputpath='.', anyxml = False):
# MOVE THIS INITIALISATION TO IN THE FOR LOOOP TO AVOID BLANK FILES AT END. TEST.
    lsummary_out = UnicodeWriter(open(outputpath + 'linguistic-metrics.csv', 'wb'), delimiter=',', quotechar='"')
    lsummary_out.writerow(["File Name", "Total Words or Phrases", "Total Unique Words or Phrases", "Total Words", "Total Phrases", "Total Unique Words", "Total Unique Phrases", "Types of Word"])
    donedirs = []
    allwordsphrases = []
    gridset = ''

    for r,dirs,files in os.walk(wordlistdir):                  # Parse any directory, picking up on either wordlist.xml or any xml files (USE with CARE)!
        for filename in files:                                      # Assume that xml files are organised in directories according to the Gridset. Will not currently work when parsing Grid user folders (it will do the stats per page)
            if (anyxml == True and filename.endswith(".xml")) or (anyxml == False and filename.endswith("wordlist.xml")):
                
                filepth = os.path.join(r,filename)
                gridset = os.path.normpath(filepth).split(os.sep)[-2]
              
                if gridset not in donedirs:                              # Got to new directory. MUST BE BETTER WAY OF DOING THIS!!!
                    donedirs.append(gridset)
                    try:                                                 # Make directory to output raw data.
                        os.mkdir(outputpath + '/'+ gridset )
                    except OSError, e:
                        if e.errno != errno.EEXIST:
                            raise
                                            
                    if len(donedirs)>1:
                        writeOut(lsummary_out, allwordsphrases, outputpath, donedirs[-2])    # Write raw data and summary data after recursing a directory.
                        allwordsphrases = []

                tree = etree.parse(filepth)
                allwordsphrases += tree.xpath("(//wordlist//word//wordtext)/text()")

Exemple #3

0

Afficher le fichier

Fichier : g2translateGUI.py Projet : willwade/AAC-Tools

def get_LangADict(userdir='',output_file='Lang.csv',auto_translate=False,lang_code='es'):
    '''
    Look for wordlist.xml files and grid.xml files
    extract any cdata plonking into a translate file for now.. 
    '''
    
    userdir = os.path.normpath(userdir+'/Grids/')
    wordlist = list()
    # Get gridsetname
    for r,d,f in os.walk(userdir):   # Parse  directories to find Grids directory
        for files in f:
            if 'xml' in files:
                pth = r+'/'+files
                parser = etree.XMLParser(strip_cdata=False)
                tree = etree.parse(pth, parser) # Parse the file
                wordlist = _add_to_list(wordlist, tree.xpath('//commands/command[id = "type"]/parameter[@index = "1"]/text()'))
                wordlist = _add_to_list(wordlist, tree.xpath('//wordtext/text()'))
                wordlist = _add_to_list(wordlist, tree.xpath('//tooltip/text()'))
                wordlist = _add_to_list(wordlist, tree.xpath('//caption/text()'))
                # now dump all words to a big file.. 
                # wordtext, tooltip, caption 
    
    #wordFile = UnicodeWriter(open('Language.csv', 'wb'), delimiter=',', quotechar='"')
    
    ff = open(output_file, 'wb')
    #ff.write(codecs.BOM_UTF8)
    wordFile = UnicodeWriter(ff)
    
    for word in wordlist:
        if auto_translate:
            wordFile.writerow([word,translate(word, lang_code)])
        else:
             wordFile.writerow([word,'tbc'])
    return True

Exemple #4

0

Afficher le fichier

Fichier : exporter.py Projet : ConnorChristie/project-tweet

def write_to_csv(prefix, tweets):
	out_csv = UnicodeWriter(open('%s%s' % (prefix, FILENAME), 'wb'), delimiter=',')
	out_csv.writerow(COLUMNS)
	for tweet in tweets:
		row = []
		for column in COLUMNS:
			if column in tweet:
				row.append(tweet[column])
		out_csv.writerow(row)

Exemple #5

0

Afficher le fichier

Fichier : spider.py Projet : dayDream247/houseSpider

def write_csv(info_dics, output):
    """
	write infomation to csv
	"""

    with open(output, 'w+') as o:
        fw = UnicodeWriter(o, encoding="utf-8")
        fw.writerow(info_dics[0].keys())
        for d in info_dics:
            fw.writerow(d.values())

Exemple #6

0

Afficher le fichier

Fichier : dataclean_beijing.py Projet : dayDream247/houseSpider

    def _combine_csv(self, output):

        with open(output, 'w+') as o:
            fw = UnicodeWriter(o)
            fw.writerow(table_title)
            for file in self.files:
                if not file.endswith('.csv'):
                    continue
                itable = self._read_restruct_csv(file)
                for i in itable:
                    fw.writerow(i)

Exemple #7

0

Afficher le fichier

Fichier : wordlistMetrics.py Projet : simonjudge/AAC-Tools

def writeOut(lsummary_out, allwordsphrases=[],  outputpath='.', gridset=''):    
 
    # Write data out for the last folder (gridset) encountered - MUST BE A BETTER WAY THAN THIS?
    uWordsPhrases = uniqueSet(allwordsphrases)              # Set of unique words.
    uwords =[]
    uphrases = []
    words = []
    phrases =[]
    wordtypes =[]
    wordtypes =[]
    total_wordsphrases = total_uwordsphrases = total_words = total_phrases = 0

    ldata_out = UnicodeWriter(open(outputpath + '/'+ gridset +'/language-data.csv', 'wb'), delimiter=',', quotechar='"')
    ldata_out.writerow(["WORD", "NUMBER OF WORDS", "COUNT", "TYPE"])
    
   # Output metrics  to file.
    for item in uWordsPhrases:
       num_words = len(item.split())
       item_count = allwordsphrases.count(item)
       if num_words == 1:                          # Single word
          word_type = nltk.pos_tag(item)[-1][-1]
          #word_type_help = nltk.help.upenn_tagset(word_type)
# MAYBE CONVERT TAGS INTO MORE USEFUL WORDS?!
          ldata_out.writerow([item, str(num_words), str(item_count), word_type])
          uwords.append(item)
          wordtypes.append(word_type)
       elif num_words > 1:                         # Phrase
          nltk_words = nltk.word_tokenize(item)
          word_pos = nltk.pos_tag(nltk_words) ### HOW TO DEAL WITH PHRASES???
          word_types = [x[1] for x in word_pos]
          ldata_out.writerow([item, str(num_words), str(item_count), " ,".join(word_types)])
# HOW TO OUTPUT EACH POS TO A COLUMN???
          uphrases.append(item)

    for item in allwordsphrases:
        num_words = len(item.split())
        if num_words == 1:
            words.append(item)
        elif num_words > 1:
            phrases.append(item)
        
    uword_types = countDuplicatesInList(wordtypes)
    
    total_wordsphrases = len(allwordsphrases)
    total_uwordsphrases = len(uWordsPhrases)
    total_uwords = len(uwords)
    total_uphrases = len(uphrases)

    total_words = len(words)
    total_phrases = len(phrases)
    
    #["File Name", "Total Words or Phrases", "Total Unique Words or Phrases", "Total Words", "Total Phrases", "Total Unique Words", "Total Unique Phrases", "Types of Word"])
    lsummary_out.writerow([gridset, str(total_wordsphrases), str(total_uwordsphrases), str(total_words), str(total_phrases), str(total_uwords), str(total_uphrases), ', '.join(map(str, uword_types))])

    raw_words_out = open(outputpath + '/'+ gridset +'/raw-unique-words.text', 'wb')
    raw_words_out.writelines('\n'.join(uWordsPhrases).encode('utf-8'))
    raw_phrases_out = open(outputpath + '/'+ gridset +'/raw-unique-phrases.txt', 'wb')
    raw_phrases_out.writelines('\n'.join(uphrases).encode('utf-8'))
    raw_words_out = open(outputpath + '/'+ gridset +'/raw-wordsphrases.text', 'wb')
    raw_words_out.writelines('\n'.join(allwordsphrases).encode('utf-8'))

Exemple #8

0

Afficher le fichier

Fichier : g2translate.py Projet : willwade/AAC-Tools

def get_LangADict(userdir="", output_file="Lang.csv", auto_translate=False, lang_code="es"):
    """
    Look for wordlist.xml files and grid.xml files
    extract any cdata plonking into a translate file for now.. 
    """

    userdir = os.path.normpath(userdir + "/Grids/")
    wordlist = list()
    # Get gridsetname
    for r, d, f in os.walk(userdir):  # Parse  directories to find Grids directory
        for files in f:
            if "xml" in files:
                pth = r + "/" + files
                parser = etree.XMLParser(strip_cdata=False)
                tree = etree.parse(pth, parser)  # Parse the file
                wordlist = _add_to_list(
                    wordlist, tree.xpath('//commands/command[id = "type"]/parameter[@index = "1"]/text()')
                )
                wordlist = _add_to_list(wordlist, tree.xpath("//wordtext/text()"))
                wordlist = _add_to_list(wordlist, tree.xpath("//tooltip/text()"))
                wordlist = _add_to_list(wordlist, tree.xpath("//caption/text()"))
                # now dump all words to a big file..
                # wordtext, tooltip, caption

    # wordFile = UnicodeWriter(open('Language.csv', 'wb'), delimiter=',', quotechar='"')

    ff = open(output_file, "wb")
    # ff.write(codecs.BOM_UTF8)
    wordFile = UnicodeWriter(ff)

    goog_txt = ""

    for word in wordlist:
        if auto_translate:
            wordFile.writerow([word, translate(word, lang_code)])
        else:
            wordFile.writerow([word, "tbc"])

    if auto_translate:
        goog_trs = translate(goog_txt[1:], lang_code)
        goog_wds = goog_trs.split("|")
        for n in range(0, len(wordlist)):
            wordFile.writerow([wordlist[n], goog_wds[n]])

    return True

Exemple #9

0

Afficher le fichier

Fichier : parse_label.py Projet : presto-osu/fse18

        try:
            label = tag[u'android:label']
        except KeyError:
            label = app_label
        if label.startswith(u'@ref/'):
            label = get_label(label[len(u'@ref/'):], pkg, apk)
        name = tag[u'android:name']
        if name.startswith('.'):
            name = pkg + name
        star = apk.split('/')[-1][:-len('.apk')]
        print('[%4d] %s %s "%s"' % (i, star, name, label))
        apps.append([star, name, label])


for apk in glob.glob('../apks/wear/*.apk'):
    i += 1
    run(i, apk)

with open('pkg_wfs_label.csv', 'wb') as f:
    writer = UnicodeWriter(f)
    # writer = csv.writer(f, encoding='utf-8')
    for wf in apps:
        print('[write] %s' % wf)
        writer.writerow(wf)

with open('pkg_wfs_label.csv', 'rb') as f:
    reader = UnicodeReader(f)
    # reader = (f, encoding='utf-8')
    for row in reader:
        print('[read] %s' % row)

Exemple #10

0

Afficher le fichier

Fichier : ConvertGridtoWordLists.py Projet : simonjudge/AAC-Tools

def parse_grids(gridxml='grid.xml',outputpath='.',userdir='.',
				excludehidden=False,outputwordlists=True,
				ignoregrids=[],ignorecells=[], blackliststyles=[],singlefile=False, outputcsv=False, word_length=2, merge_lists=False):
# gridxml,outputpath,userdir,excludehidden,outputwordlists, ignoregrids, ignorecells, singlefile, outputcsv
	'''
	Parse Grid.xml files recursively. Extract Vocabulary and store it out as CSV files and/or as Wordlist files
	'''

	# Get gridsetname
	for r,d,f in os.walk(userdir):                                  # Parse  directories to find Grids directory
		#pdb.set_trace()
		if "Grids" in d:
			gridsetname=os.path.split(os.path.normpath(r))[1]

	# outputing to Grid folders or other output folder?
	# Check to see if output directory specified, if not output to the Grid directories.
	if (outputpath == '.'):
		outinplace = True
	else:
		outputpath = outputpath + '/'
		outinplace=False 

	# outputing to single file?
	if(singlefile):
		if(outputwordlists):
			file_out = open(outputpath +  gridsetname +'.xml', 'wb')
			wordlist = etree.Element("wordlist")
		if(outputcsv):
			vocabWriter = UnicodeWriter(open(outputpath + 'vocab.csv', 'wb'), delimiter=',', quotechar='"')
	
	for r,d,f in os.walk(userdir):                                  # Parse any directory, only picking up on grid.xml files.
		page = os.path.split(r)[1]
		if page not in ignoregrids:
			for files in f:

				if files.endswith("grid.xml"):
#### ******** ITTERATE OVER ALL GRID FILES. ********** ########
					pth = os.path.join(r,files)
					existing_wordlist = False
					
					if (outinplace):                                # Check to see if output directory specified, if not output to the Grid directories.
						outputpath = r + '/'
					parser = etree.XMLParser(strip_cdata=False)
					tree = etree.parse(pth, parser)                 # Parse the file
					if(tree.xpath(".//licencekey") == []):          # does it have a licencekey? Bugger if it has 
						readpictures = True
					else:
						readpictures = False                        # So this grid is licenced. Dont try and read the pictures
					cells = tree.xpath(".//cell")
						
					if(singlefile == False):
						if(outputwordlists):
							wordlist = etree.Element("wordlist")
						if (outputcsv):
							vocabWriter = UnicodeWriter(open(outputpath + page + '.csv', 'wb'), delimiter=',', quotechar='"')


					for cell in cells:
#### ******* Itterate over all cells in a grid ********** #########
						autocells = cell.xpath("(.//autocell)/text()")
						if '1' in autocells:			# Cell is a wordlist cell
							existing_wordlist = True
						else:	
							tt = ''.join(cell.xpath("(.//caption)/text()"))
							style = ''.join(cell.xpath(".//stylepreset/text()"))
							command_id = cell.xpath(".//id/text()")                 # Check the /Paramter/ID value to check if 'type' - i.e. being sent to the text bar.
	## NOT PERFECT - need to grab text sent to text bar rahter than caption...
							if ("type" in command_id or "speaknow" in command_id) and ("jump.to" not in command_id) and ("jump.back" not in command_id):    # We are only interested if text is being sent to the text bar or being spoken directly. We are not interested in jump cell text.
							#if tt != '':                                           # UNCOMMENT TO INCLUDE ALL CELLS WITH A CAPTION.
								if style not in blackliststyles:
	# Implement white list too?
									if  tt not in ignorecells:
										if ''.join(cell.xpath(".//hidden/text()")) != '1':
											cellchildren = cell.getchildren()
											vocabtext = picture = ''
											for cellchild in cellchildren:
												# Check if the cell has a type of speak command and if so save the text(s).
												commands = cellchild.getchildren()
												for command in commands:
													id = command.find("id")
													if id is not None:
														if id.text == "type" or "speaknow":
															parameters = command.findall("parameter")
															for parameter in parameters:
																if "1" in parameter.xpath(".//@index"):                                
																	vocabtext = parameter.text.strip()          # Grid seems to add Asquiggle charchters to the text if there is a space in the text output. Luckily python strip ditches them!
																	if(outputwordlists and len(vocabtext)>word_length and vocabtext !=[]  and re.match("[A-Za-z]", vocabtext)):  
																	# Ignore writing if no text, or text is smaller than min word size, or text is not letters.
																		word = etree.SubElement(wordlist, "word")
																		wordtext = etree.SubElement(word, "wordtext")
																		wordtext.text = etree.CDATA(vocabtext)
																		# Check if the cell has a picture (symbol) and if so save the picture path.
																		picture = ''.join(cell.xpath(".//picture/text()"))
																		if ((readpictures==True) and (picture != [])):   # ignore if no picture 
																			if(outputwordlists):
																				picturefile = etree.SubElement(word, "picturefile")
																				picturefile.text = picture
																			if (outputcsv):
																				vocabWriter.writerow([pth,cell.get('x'),cell.get('y'),vocabtext,picture])

					# Add in data from any existing wordlists! 
					if existing_wordlist == True and merge_lists==True:
						wordlistpath = os.path.dirname(pth) + "\wordlist.xml" 
						if os.path.isfile(wordlistpath):                # wordlist exists for this grid and is referenced in the grid. Need to add the wordlist data to the uber wordlist.
							wordlistwordlist = etree.parse(wordlistpath)
							root = wordlistwordlist.getroot()
							
							for wordx in root.iterfind("word"):     # MORE EFFICIENT METHOD???
								if outputwordlists:
									wordlist.append(wordx)          # HOW TO MAKE IT CDATA?
								if outputcsv:
									vocabWriter.writerow([pth,"wordlist","wordlist",str(wordx.findtext("wordtext")),str(wordx.findtext("picturefile"))])

######### ******* Write out data to many files ********** ###########
					if(singlefile == False):
						if(wordlist.findall('./word/')!=[]):				 # Do not write wordlists if they are blank.
							if (existing_wordlist==False or (existing_wordlist==True and merge_lists==True)): # Do not write worlists if there is a current list and not set to merge
								if(outinplace):
									if(outputwordlists):
										# Writing multiple files to Grid folders
										file_out = open( outputpath + 'wordlist.xml', 'wb')
										file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))
								else:
									if(outputwordlists):
										# writing multiple files to output folder (make a folder for the grids, name them by the page).
										try:
											os.mkdir(outputpath + '/'+ gridsetname)
										except OSError, e:
											if e.errno != errno.EEXIST:
												raise
										file_out = open(outputpath + '/' + gridsetname + '/' + page +'.xml', 'wb')
										file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))

Exemple #11

0

Afficher le fichier

Fichier : views.py Projet : NESCent/feedingdb

def bucket_download(request, id):
    # TODO: access control
    message = ""
    bucket = get_bucket(request, id)

    if request.method == 'POST':
        try:
            zipfile_name = request.POST['zipfile_name']
        except KeyError:
            zipfile_name = bucket.default_zipfile_name()
            if zipfile_name == "":
                messages.error(request, 'No zip file name selected.')
                c = RequestContext(request, {'title': 'FeedDB Explorer'})
                return render_to_response('explorer/base.html', c)

        if not zipfile_name.endswith(".zip"):
            zipfile_name += ".zip"

        download_choice = request.POST['download_choice']
        channel_choice = request.POST['channel_choice']
        #meta_option= request.POST['meta_option']
        quotechar_char = '"'
        #delimiter= request.POST['delimiter']
        #if delimiter =="tab":
        #    delimiter_char = '\t'
        #elif delimiter =="comma":
        #    delimiter_char = ','
        #else:
        delimiter_char = ','

        #get selected fields
        field_selected = []
        for item in request.POST.items():
            if (item[1] == "on" and item[0].startswith("chk:")):
                field_selected.append(item[0])
                message += item[0] + "\n"
        if (download_choice == "0"
                or download_choice == "2") and len(field_selected) == 0:
            messages.error(request, 'No fields selected.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)
        meta_selected = {}
        for field in field_selected:
            parts = field.split(":")
            if not parts[1] in meta_selected:
                meta_selected[parts[1]] = []
            parameter = parts[1] + ":" + parts[2]
            meta_selected[parts[1]].append([parts[2], request.POST[parameter]])

        #get selected channels
        channel_selected = []
        channel_headers = []
        for item in request.POST.items():
            if (item[1] == "on" and item[0].startswith("channel:")):
                channel_selected.append(item[0])
                message += item[0] + "\n"
        if (channel_choice == "1" and len(channel_selected) == 0):
            messages.error(request, 'No channels selected.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)
        channel_download = []
        channel_selected.sort()
        trials_download = []
        for ch in channel_selected:
            parts = ch.split(":")
            channel_download.append([parts[1], parts[2]])
            channel_headers.append("Trial %s:Channel %s" %
                                   (parts[1], parts[2]))
            if not parts[1] in trials_download:
                trials_download.append(parts[1])
        filenames = {}

        # create a temporary folder to store files
        from time import time
        tempdir = settings.EXPLORER_TEMPORARY_FOLDER + "/" + str(
            time()).replace('.', '')

        try:
            os.makedirs(tempdir)
        except OSError, err:
            messages.error(
                request,
                'Failed to create folder for storing downloaded files.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)

        #
        # create meta data if the user has chosen to do so
        #
        if (download_choice == "0" or download_choice == "2"):
            #create trials mate data file and out it into the temp zip file
            full_filename = "%s/trials.csv" % tempdir
            filenames["trials.csv"] = full_filename

            metaWriter = UnicodeWriter(open(full_filename, "w"),
                                       delimiter=delimiter_char,
                                       doublequote='false',
                                       escapechar='\\',
                                       quotechar=quotechar_char,
                                       quoting=csv.QUOTE_MINIMAL)

            #output trials
            #output headers
            headers = ["Trial:ID"]
            for key, value in meta_selected.items():
                if not key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor',
                               'EmgSensor', 'SonoSensor', 'Channel',
                               'EmgChannel', 'SonoChannel', 'PressureChannel',
                               'ForceChannel', 'StrainChannel',
                               'KinematicsChannel', 'EventChannel'):
                    for v in value:
                        headers.append(v[1])
            metaWriter.writerow(headers)

            objects = {}
            for trial in bucket.trials.all():
                values = [trial.id]
                objects["Session"] = trial.session
                objects["Experiment"] = trial.session.experiment
                objects["Study"] = trial.session.experiment.study
                objects["Subject"] = trial.session.experiment.subject
                objects["Trial"] = trial
                for key, value in meta_selected.items():
                    if key in objects:
                        for v in value:
                            s = getattr(objects[key], v[0])
                            if hasattr(s, 'split'):
                                ss = s.split('\r\n')
                                if len(ss) > 1:
                                    s = ' '.join(ss)

                            values.append(s)

                metaWriter.writerow(values)

            #output channels
            #generate channel headers
            headers = ["Channel:ID"]
            for key, value in meta_selected.items():
                #generate headers meta data
                if key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor',
                           'EmgSensor', 'SonoSensor', 'Channel', 'EmgChannel',
                           'SonoChannel', 'PressureChannel', 'ForceChannel',
                           'StrainChannel', 'KinematicsChannel',
                           'EventChannel'):
                    for v in value:
                        headers.append(v[1])

            for key, value in meta_selected.items():
                #generate headers for 2 meta data (specifically for crystal2 in sono data
                if key in ('Sensor', 'SonoSensor'):
                    for v in value:
                        headers.append('Sensor 2:%s' % v[1])

            channel_types = [
                'strainchannel', 'forcechannel', 'pressurechannel',
                'kinematicschannel'
            ]
            for trial in bucket.trials.all():
                #trial_name = trial.title.replace('.', '').replace(',', '').replace(' ', '_').strip().lower()
                #filename = "trial_%d_%s_channels.csv" % (trial.id, trial_name)
                #full_filename = "%s/trial_%d_%s_channels.csv" % (tempdir, trial.id,trial_name)
                filename = "trial_%d_channels.csv" % trial.id
                full_filename = "%s/trial_%d_channels.csv" % (tempdir,
                                                              trial.id)
                filenames[filename] = full_filename

                f = open(full_filename, "w")
                metaWriter = UnicodeWriter(f,
                                           delimiter=delimiter_char,
                                           doublequote='false',
                                           escapechar='\\',
                                           quotechar=quotechar_char,
                                           quoting=csv.QUOTE_MINIMAL)
                metaWriter.writerow(headers)
                objects = {}
                for lineup in trial.session.channellineup_set.all():
                    objects = {}
                    ch = lineup.channel

                    if ch == None:
                        values = ["deadchannel"]
                    else:
                        objects["Channel"] = lineup.channel
                        values = [ch.id]
                        objects["Setup"] = ch.setup
                        for channel_type in channel_types:
                            if hasattr(ch, channel_type):
                                objects["Sensor"] = getattr(
                                    ch, channel_type).sensor
                        if hasattr(ch.setup, 'emgsetup'):
                            objects["EmgSetup"] = ch.setup.emgsetup
                        if hasattr(ch.setup, 'sonosetup'):
                            objects["SonoSetup"] = ch.setup.sonosetup
                        if hasattr(ch, 'emgchannel'):
                            objects["EmgChannel"] = ch.emgchannel
                            objects["Sensor"] = ch.emgchannel.sensor
                            objects["EmgSensor"] = ch.emgchannel.sensor
                        if hasattr(ch, 'eventchannel'):
                            objects["EventChannel"] = ch.eventchannel
                        if hasattr(ch, 'pressurechannel'):
                            objects["PressureChannel"] = ch.pressurechannel
                        if hasattr(ch, 'strainchannel'):
                            objects["StrainChannel"] = ch.strainchannel
                        if hasattr(ch, 'forcechannel'):
                            objects["ForceChannel"] = ch.forcechannel
                        if hasattr(ch, 'kinematicschannel'):
                            objects["KinematicsChannel"] = ch.kinematicschannel
                        if hasattr(ch, 'sonochannel'):
                            objects["SonoChannel"] = ch.sonochannel
                            objects["Sensor"] = ch.sonochannel.crystal1
                            objects["SonoSensor"] = ch.sonochannel.crystal1
                        if hasattr(ch, 'emgchannel'):
                            objects["Sensor"] = ch.emgchannel.sensor

                    for key, value in meta_selected.items():
                        if key in ('Setup', 'EmgSetup', 'SonoSetup', 'Sensor',
                                   'EmgSensor', 'SonoSensor', 'Channel',
                                   'EmgChannel', 'SonoChannel',
                                   'PressureChannel', 'ForceChannel',
                                   'StrainChannel', 'KinematicsChannel',
                                   'EventChannel'):
                            for v in value:
                                s = ''
                                if key in objects and objects[key] != None:
                                    s = getattr(objects[key], v[0])
                                    if hasattr(
                                            s,
                                            'split'):  #check if s is a string
                                        ss = s.split('\r\n')
                                        if len(ss) > 1:
                                            s = ' '.join(ss)
                                values.append(s)

                    #output the second crystal sensor information if it is sono channel
                    if hasattr(ch, 'sonochannel'):
                        objects["Sensor"] = ch.sonochannel.crystal2
                        objects["SonoSensor"] = ch.sonochannel.crystal2
                        for key, value in meta_selected.items():
                            if key in ('Sensor', 'SonoSensor'):
                                for v in value:
                                    s = ''
                                    if key in objects:
                                        s = getattr(objects[key], v[0])
                                        if hasattr(s, 'split'):
                                            ss = s.split('\r\n')
                                            if len(ss) > 1:
                                                s = ' '.join(ss)
                                    values.append(s)
                    metaWriter.writerow(values)

                f.close()
        #
        # put data files into the tmp zip
        #
        data_files = {}
        if (download_choice == "1" or download_choice == "2"):
            # download all trial files
            if channel_choice == "0":
                for trial in bucket.trials.all():
                    #check if there is a data file
                    if (trial.data_file != None and trial.data_file != ""):
                        filename = "trial_%d.dat" % trial.id
                        full_filename = "%s/%s" % (settings.MEDIA_ROOT,
                                                   trial.data_file)
                        data_files[filename] = full_filename
            else:
                # download selected channels
                filename = "channels.dat"
                full_filename = "%s/channels.dat" % tempdir
                filenames[filename] = full_filename
                f = open(full_filename, "w")
                metaWriter = UnicodeWriter(f,
                                           delimiter=delimiter_char,
                                           doublequote='false',
                                           escapechar='\\',
                                           quotechar=quotechar_char,
                                           quoting=csv.QUOTE_MINIMAL)
                metaWriter.writerow(channel_headers)
                trial_readers = {}
                total_trial_number = 0
                for trial in bucket.trials.all():
                    #check if there is a data file
                    if (trial.data_file != None and trial.data_file != ""
                            and str(trial.id) in trials_download):
                        full_filename = "%s/%s" % (settings.MEDIA_ROOT,
                                                   trial.data_file)
                        csvfile = open(full_filename, "rU")
                        dialect = csv.Sniffer().sniff(csvfile.read(1024))
                        csvfile.seek(0)
                        reader = csv.reader(csvfile, dialect)
                        trial_readers[str(trial.id)] = {
                            "reader": reader,
                            "hasmore": True,
                            "file": csvfile
                        }
                        total_trial_number += 1

                rows = {}
                newrow = []
                finished_file_number = 0

                while finished_file_number < total_trial_number:
                    rows.clear()
                    for key in trial_readers:
                        try:
                            if trial_readers[key]["hasmore"]:
                                row = trial_readers[key]["reader"].next()
                                rows[key] = row
                        except StopIteration:
                            finished_file_number += 1
                            trial_readers[key]["hasmore"] = False
                            trial_readers[key]["file"].close()

                    newrow = []
                    for ch in channel_download:
                        if ch[0] in rows:
                            if int(ch[1]) > len(rows[ch[0]]):
                                messages.error(
                                    request,
                                    "Error in channel lineup positions for trial: %s"
                                    % ch[0])
                                c = RequestContext(
                                    request, {'title': 'FeedDB Explorer'})
                                return render_to_response(
                                    'explorer/base.html', c)
                            newrow.append(rows[ch[0]][int(ch[1]) - 1])
                        else:
                            newrow.append('')
                    metaWriter.writerow(newrow)
                f.close()
        response = send_zipfile(request, filenames, data_files, zipfile_name)
        for file, full_file in filenames.items():
            os.remove(full_file)
        os.rmdir(tempdir)
        return response

Exemple #12

0

Afficher le fichier

Fichier : ConvertGridtoWordListsLanguage.py Projet : willwade/AAC-Tools

def translate_grids(gridxml='grid.xml',outputpath='.',userdir='.',
                ignoregrids=[],ignorecells=[], blackliststyles=[]):
    '''
    Parse Grid.xml files recursively. Lookup each word and replace with foreign word
    '''

    # Get gridsetname
    for r,d,f in os.walk(userdir):                                  # Parse  directories to find Grids directory
        #pdb.set_trace()
        if "Grids" in d:
            gridsetname=os.path.split(os.path.normpath(r))[1]

    # outputing to Grid folders or other output folder?
    # Check to see if output directory specified, if not output to the Grid directories.
    if (outputpath == '.'):
        outinplace = True
    else:
        outputpath = outputpath + '/'
        outinplace=False 
    
    for r,d,f in os.walk(userdir):                                  # Parse any directory, only picking up on grid.xml files.
        page = os.path.split(r)[1]
        if page not in ignoregrids:
            for files in f:

                if files.endswith("grid.xml"):
                    pth = os.path.join(r,files)
                    
                    if (outinplace):                                # Check to see if output directory specified, if not output to the Grid directories.
                        outputpath = r + '/'
                    parser = etree.XMLParser(strip_cdata=False)
                    tree = etree.parse(pth, parser)                 # Parse the file
                    if(tree.xpath(".//licencekey") == []):          # does it have a licencekey? Bugger if it has 
                        readpictures = True
                    else:
                        readpictures = False                        # So this grid is licenced. Dont try and read the pictures
                    cells = tree.xpath(".//cell")
                        
                    if(singlefile == False):
                        if(outputwordlists):
                            wordlist = etree.Element("wordlist")
                        if (outputcsv):
                            vocabWriter = UnicodeWriter(open(outputpath + page + '.csv', 'wb'), delimiter=',', quotechar='"')

                    # Add in data from any existing wordlists!
                    wordlistpath = os.path.dirname(pth) + "\wordlist.xml" 
                    if os.path.isfile(wordlistpath):                # wordlist exists for this grid. Need to add the wordlist data to the uber wordlist.
                        wordlistwordlist = etree.parse(wordlistpath)
                        root = wordlistwordlist.getroot()
                        
                        for wordx in root.iterfind("word"):     # MORE EFFICIENT METHOD???
                            if outputwordlists:
                                wordlist.append(wordx)          # HOW TO MAKE IT CDATA?
                            if outputcsv:
                                vocabWriter.writerow([pth,"wordlist","wordlist",str(wordx.findtext("wordtext")),str(wordx.findtext("picturefile"))])
   
                    for cell in cells:
                        tt = ''.join(cell.xpath("(.//caption)/text()"))
                        style = ''.join(cell.xpath(".//stylepreset/text()"))
                        command_id = cell.xpath(".//id/text()")                 # Check the /Paramter/ID value to check if 'type' - i.e. being sent to the text bar.
## NOT PERFECT - need to grab text sent to text bar rahter than caption...
                        if "type" in command_id or "speaknow" in command_id:    # We are only interested if text is being sent to the text bar or being spoken directly.
                        #if tt != '':                                           # UNCOMMENT TO INCLUDE ALL CELLS WITH A CAPTION.
                            if style not in blackliststyles:
# Implement white list too?
                                if  tt not in ignorecells:
                                    if ''.join(cell.xpath(".//hidden/text()")) != '1':
                                        if(outputwordlists):
                                            word = etree.SubElement(wordlist, "word")
                                        cellchildren = cell.getchildren()
                                        vocabtext = picture = ''
                                        for cellchild in cellchildren:
                                            # Check if the cell has a type of speak command and if so save the text(s).
                                            commands = cellchild.getchildren()
                                            for command in commands:
                                                id = command.find("id")
                                                if id is not None:
                                                    if id.text == "type" or "speaknow":
                                                        parameters = command.findall("parameter")
                                                        for parameter in parameters:
                                                            if "1" in parameter.xpath(".//@index"):                                
                                                                vocabtext = parameter.text.strip()          # Grid seems to add Asquiggle charchters to the text if there is a space in the text output. Luckily python strip ditches them!
                                                                if(outputwordlists):
                                                                    wordtext = etree.SubElement(word, "wordtext")
                                                                    wordtext.text = etree.CDATA(vocabtext)
                                                        # Check if the cell has a picture (symbol) and if so save the picture path.
    ## Potential for blank words, if cell has symbol, but no text. What to do about this???
                                                        picture = ''.join(cell.xpath(".//picture/text()"))
                                                        if ((readpictures==True) and (picture != [])):
                                                            if(outputwordlists):
                                                                picturefile = etree.SubElement(word, "picturefile")
                                                                picturefile.text = picture
                                                        if (outputcsv):
                                                            vocabWriter.writerow([pth,cell.get('x'),cell.get('y'),vocabtext,picture])

                    if(singlefile == False):
                        if(outinplace):
                            if(outputwordlists):
                                # Writing multiple files to Grid folders
                                file_out = open( outputpath + 'wordlist.xml', 'wb')
                                file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))
                        else:
                            if(outputwordlists):
                                # writing multiple files to output folder (make a folder for the grids, name them by the page).
                                try:
                                    os.mkdir(outputpath + '/'+ gridsetname)
                                except OSError, e:
                                    if e.errno != errno.EEXIST:
                                        raise
                                file_out = open(outputpath + '/' + gridsetname + '/' + page +'.xml', 'wb')
                                file_out.write('<?xml version="1.0" encoding="UTF-8"?>' + etree.tostring(wordlist, pretty_print=True, encoding='utf-8'))

Exemple #13

0

Afficher le fichier

Fichier : views.py Projet : NESCent/feedingdb

def bucket_download(request, id):
    # TODO: access control
    message=""
    bucket = get_bucket(request, id)

    if request.method=='POST':
        try:
            zipfile_name = request.POST['zipfile_name']
        except KeyError:
            zipfile_name = bucket.default_zipfile_name()
            if zipfile_name == "":
                messages.error(request, 'No zip file name selected.')
                c = RequestContext(request, {'title': 'FeedDB Explorer'})
                return render_to_response('explorer/base.html', c)

        if not zipfile_name.endswith(".zip"):
            zipfile_name +=".zip"

        download_choice= request.POST['download_choice']
        channel_choice = request.POST['channel_choice']
        #meta_option= request.POST['meta_option']
        quotechar_char='"'
        #delimiter= request.POST['delimiter']
        #if delimiter =="tab":
        #    delimiter_char = '\t'
        #elif delimiter =="comma":
        #    delimiter_char = ','
        #else:
        delimiter_char = ','


        #get selected fields
        field_selected = []
        for item in request.POST.items():
            if(item[1]=="on" and item[0].startswith("chk:")):
                field_selected.append(item[0])
                message += item[0]+"\n"
        if  (download_choice=="0" or  download_choice=="2") and  len(field_selected) ==0:
            messages.error(request, 'No fields selected.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)
        meta_selected = {}
        for field in field_selected:
            parts=field.split(":")
            if not parts[1] in meta_selected:
                meta_selected[parts[1]]=[]
            parameter=parts[1]+":"+parts[2]
            meta_selected[parts[1]].append([parts[2],request.POST[parameter]])

         #get selected channels
        channel_selected = []
        channel_headers=[]
        for item in request.POST.items():
            if(item[1]=="on" and item[0].startswith("channel:")):
                channel_selected.append(item[0])
                message += item[0]+"\n"
        if  (channel_choice=="1" and len(channel_selected) ==0):
            messages.error(request, 'No channels selected.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)
        channel_download = []
        channel_selected.sort()
        trials_download =[]
        for ch in channel_selected:
            parts=ch.split(":")
            channel_download.append([parts[1], parts[2]])
            channel_headers.append("Trial %s:Channel %s" % (parts[1], parts[2]))
            if not parts[1] in trials_download:
                trials_download.append(parts[1])
        filenames={}

        # create a temporary folder to store files
        from time import time
        tempdir = settings.EXPLORER_TEMPORARY_FOLDER+"/"+str(time()).replace('.', '')

        try:
            os.makedirs(tempdir)
        except OSError, err:
            messages.error(request, 'Failed to create folder for storing downloaded files.')
            c = RequestContext(request, {'title': 'FeedDB Explorer'})
            return render_to_response('explorer/base.html', c)

        #
        # create meta data if the user has chosen to do so
        #
        if  (download_choice=="0" or  download_choice=="2"):
            #create trials mate data file and out it into the temp zip file
            full_filename = "%s/trials.csv" % tempdir
            filenames["trials.csv"]=full_filename

            metaWriter = UnicodeWriter(open(full_filename,"w"), delimiter=delimiter_char,  doublequote='false' , escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL)

            #output trials
            #output headers
            headers=["Trial:ID"]
            for key, value in meta_selected.items():
                if not key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel',
                              'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'):
                    for v in value:
                        headers.append( v[1] )
            metaWriter.writerow(headers)

            objects={}
            for trial in bucket.trials.all():
                values=[trial.id]
                objects["Session"]= trial.session
                objects["Experiment"]=trial.session.experiment
                objects["Study"]=trial.session.experiment.study
                objects["Subject"]=trial.session.experiment.subject
                objects["Trial"]=trial
                for key, value in meta_selected.items():
                    if key in objects:
                        for v in value:
                            s=getattr(objects[key], v[0])
                            if hasattr(s,'split'):
                                ss=s.split('\r\n')
                                if len(ss)>1:
                                    s=' '.join(ss)

                            values.append(s)

                metaWriter.writerow(values)

            #output channels
            #generate channel headers
            headers=["Channel:ID"]
            for key, value in meta_selected.items():
                #generate headers meta data
                if key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel',
                          'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'):
                    for v in value:
                        headers.append( v[1] )

            for key, value in meta_selected.items():
                #generate headers for 2 meta data (specifically for crystal2 in sono data
                if key in('Sensor','SonoSensor'):
                    for v in value:
                        headers.append( 'Sensor 2:%s' % v[1] )

            channel_types = ['strainchannel','forcechannel','pressurechannel','kinematicschannel']
            for trial in bucket.trials.all():
                #trial_name = trial.title.replace('.', '').replace(',', '').replace(' ', '_').strip().lower()
                #filename = "trial_%d_%s_channels.csv" % (trial.id, trial_name)
                #full_filename = "%s/trial_%d_%s_channels.csv" % (tempdir, trial.id,trial_name)
                filename = "trial_%d_channels.csv" % trial.id
                full_filename = "%s/trial_%d_channels.csv" % (tempdir, trial.id)
                filenames[filename]=full_filename

                f = open(full_filename,"w")
                metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL)
                metaWriter.writerow(headers)
                objects={}
                for lineup in trial.session.channellineup_set.all():
                    objects={}
                    ch=lineup.channel

                    if ch == None:
                        values=["deadchannel"]
                    else:
                        objects["Channel"] = lineup.channel
                        values=[ch.id]
                        objects["Setup"]= ch.setup
                        for channel_type in channel_types:
	                        if hasattr(ch,channel_type):
	                            objects["Sensor"] = getattr(ch, channel_type).sensor
                        if hasattr(ch.setup, 'emgsetup'):
                            objects["EmgSetup"] = ch.setup.emgsetup
                        if hasattr(ch.setup, 'sonosetup'):
                            objects["SonoSetup"] = ch.setup.sonosetup
                        if hasattr(ch,'emgchannel'):
                            objects["EmgChannel"] = ch.emgchannel
                            objects["Sensor"] = ch.emgchannel.sensor
                            objects["EmgSensor"] = ch.emgchannel.sensor
                        if hasattr(ch,'eventchannel'):
                            objects["EventChannel"] = ch.eventchannel
                        if hasattr(ch,'pressurechannel'):
                            objects["PressureChannel"] = ch.pressurechannel
                        if hasattr(ch,'strainchannel'):
                            objects["StrainChannel"] = ch.strainchannel
                        if hasattr(ch,'forcechannel'):
                            objects["ForceChannel"] = ch.forcechannel
                        if hasattr(ch,'kinematicschannel'):
                            objects["KinematicsChannel"] = ch.kinematicschannel
                        if hasattr(ch,'sonochannel'):
                            objects["SonoChannel"] = ch.sonochannel
                            objects["Sensor"] = ch.sonochannel.crystal1
                            objects["SonoSensor"] = ch.sonochannel.crystal1
                        if hasattr(ch,'emgchannel'):
                            objects["Sensor"] = ch.emgchannel.sensor

                    for key, value in meta_selected.items():
                        if key in('Setup','EmgSetup','SonoSetup','Sensor','EmgSensor','SonoSensor','Channel','EmgChannel','SonoChannel',
                                  'PressureChannel','ForceChannel','StrainChannel','KinematicsChannel','EventChannel'):
                            for v in value:
                                s=''
                                if key in objects and objects[key]!=None:
                                    s=getattr(objects[key], v[0])
                                    if hasattr(s,'split'): #check if s is a string
		                                ss=s.split('\r\n')
		                                if len(ss)>1:
		                                    s=' '.join(ss)
                                values.append(s)

                    #output the second crystal sensor information if it is sono channel
                    if hasattr(ch,'sonochannel'):
                        objects["Sensor"] = ch.sonochannel.crystal2
                        objects["SonoSensor"] = ch.sonochannel.crystal2
                        for key, value in meta_selected.items():
                            if key in('Sensor','SonoSensor'):
                                for v in value:
                                    s=''
                                    if key in objects:
		                                s=getattr(objects[key], v[0])
		                                if hasattr(s,'split'):
		                                    ss=s.split('\r\n')
		                                    if len(ss)>1:
		                                        s=' '.join(ss)
                                    values.append(s)
                    metaWriter.writerow(values)

                f.close()
        #
        # put data files into the tmp zip
        #
        data_files = {}
        if  (download_choice=="1" or  download_choice=="2"):
            # download all trial files
            if channel_choice=="0":
                for trial in bucket.trials.all():
                    #check if there is a data file
                    if(trial.data_file!=None and trial.data_file!=""):
                        filename = "trial_%d.dat" % trial.id
                        full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file)
                        data_files[filename]=full_filename
            else:
                # download selected channels
                filename = "channels.dat"
                full_filename = "%s/channels.dat" % tempdir
                filenames[filename]=full_filename
                f = open(full_filename,"w")
                metaWriter = UnicodeWriter(f, delimiter=delimiter_char, doublequote='false', escapechar ='\\', quotechar=quotechar_char, quoting=csv.QUOTE_MINIMAL)
                metaWriter.writerow(channel_headers)
                trial_readers={}
                total_trial_number=0
                for trial in bucket.trials.all():
                    #check if there is a data file
                    if(trial.data_file!=None and trial.data_file!="" and str(trial.id) in trials_download ):
                        full_filename = "%s/%s" % (settings.MEDIA_ROOT, trial.data_file)
                        csvfile = open(full_filename,"rU")
                        dialect = csv.Sniffer().sniff(csvfile.read(1024))
                        csvfile.seek(0)
                        reader = csv.reader(csvfile, dialect)
                        trial_readers[str(trial.id)]={"reader":reader,"hasmore":True,"file":csvfile}
                        total_trial_number += 1

                rows ={}
                newrow=[]
                finished_file_number=0

                while finished_file_number<total_trial_number:
                    rows.clear()
                    for key in trial_readers:
                        try:
                            if trial_readers[key]["hasmore"]:
                                row = trial_readers[key]["reader"].next()
                                rows[key] = row
                        except StopIteration:
                            finished_file_number += 1
                            trial_readers[key]["hasmore"]=False
                            trial_readers[key]["file"].close()

                    newrow=[]
                    for ch in channel_download:
                        if ch[0] in rows:
                            if int(ch[1]) > len(rows[ch[0]]):
                                messages.error(request, "Error in channel lineup positions for trial: %s" % ch[0])
                                c = RequestContext(request, {'title': 'FeedDB Explorer'})
                                return render_to_response('explorer/base.html', c)
                            newrow.append(rows[ch[0]][int(ch[1])-1])
                        else:
                            newrow.append('')
                    metaWriter.writerow(newrow)
                f.close()
        response=send_zipfile(request, filenames,data_files, zipfile_name)
        for file, full_file in filenames.items():
            os.remove(full_file)
        os.rmdir(tempdir)
        return response

Exemple #14

0

Afficher le fichier

                            title_words = set([stemmer.stemWord(word.lower())
                                               for word in title.split()
                                               if word not in stopwords
                                               ])
                            if name_words == title_words:
                                wiki_match, confidence = wikipedia_match(ann)

            except Exception as e:
                import pdb
                pdb.set_trace()

            if wiki_match:
                outfile_name = new_annotations
                if close_match:
                    close_match_obj = close_match.split('/')[-1]
                    wiki_match_obj = wiki_match.split('/')[-1]

                    if close_match_obj != wiki_match_obj:
                        outfile_name = different_annotations
                    else:
                        print name, 'già matchato con: ', close_match
                        continue

                with open(outfile_name, 'a+') as outfile:
                    writer = UnicodeWriter(outfile)
                    writer.writerow([subject_url,
                                     name,
                                     wiki_match,
                                     unicode(confidence)
                                     ])