def downloadImages(xml): objects = {} # image contents will be saved here xpathImages = etree.XPath('//cnxtra:image', namespaces={'cnxtra': 'http://cnxtra'}) imageList = xpathImages(xml) for position, image in enumerate(imageList): strImageUrl = image.get('src') strImageContent = urllib2.urlopen(strImageUrl).read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ('image/png', 'image/jpeg', 'image/gif'): image.set('mime-type', strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == 'image/jpeg': strImageName += '.jpg' elif strImageMime == 'image/png': strImageName += '.png' elif strImageMime == 'image/gif': strImageName += '.gif' strAlt = image.get('alt') if not strAlt: image.set('alt', strImageUrl) # getNameFromUrl(strImageUrl)) image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging #myfile = open(strImageName, "wb") #myfile.write(strImageContent) #myfile.close return xml, objects
def download_images(xml): objects = {} # image contents will be saved here xpathImages = etree.XPath("//cnxtra:image", namespaces={"cnxtra": "http://cnxtra"}) imageList = xpathImages(xml) for position, image in enumerate(imageList): strImageUrl = image.get("src") print "Download GDoc Image: " + strImageUrl # Debugging output # TODO: This try finally block does not work when we have e.g. no network!!! try: strImageContent = urllib2.urlopen(strImageUrl).read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ("image/png", "image/jpeg", "image/gif"): image.set("mime-type", strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == "image/jpeg": strImageName += ".jpg" elif strImageMime == "image/png": strImageName += ".png" elif strImageMime == "image/gif": strImageName += ".gif" # Note: SVG is currently (2012-03-08) not supported by GDocs. image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging # myfile = open(strImageName, "wb") # myfile.write(strImageContent) # myfile.close finally: pass return xml, objects
def downloadImages(xml): objects = {} # image contents will be saved here xpathImages = etree.XPath('//cnxtra:image', namespaces={'cnxtra':'http://cnxtra'}) imageList = xpathImages(xml) for position, image in enumerate(imageList): strImageUrl = image.get('src') strImageContent = urllib2.urlopen(strImageUrl).read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ('image/png', 'image/jpeg', 'image/gif'): image.set('mime-type', strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == 'image/jpeg': strImageName += '.jpg' elif strImageMime == 'image/png': strImageName += '.png' elif strImageMime == 'image/gif': strImageName += '.gif' strAlt = image.get('alt') if not strAlt: image.set('alt', strImageUrl) # getNameFromUrl(strImageUrl)) image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging #myfile = open(strImageName, "wb") #myfile.write(strImageContent) #myfile.close return xml, objects
def download_images(xml): objects = {} # image contents will be saved here xpathImages = etree.XPath('//cnxtra:image', namespaces={'cnxtra':'http://cnxtra'}) imageList = xpathImages(xml) for position, image in enumerate(imageList): strImageUrl = image.get('src') print "Download GDoc Image: " + strImageUrl # Debugging output # TODO: This try finally block does not work when we have e.g. no network!!! try: strImageContent = urllib2.urlopen(strImageUrl).read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ('image/png', 'image/jpeg', 'image/gif'): image.set('mime-type', strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == 'image/jpeg': strImageName += '.jpg' elif strImageMime == 'image/png': strImageName += '.png' elif strImageMime == 'image/gif': strImageName += '.gif' #Note: SVG is currently (2012-03-08) not supported by GDocs. image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging #myfile = open(strImageName, "wb") #myfile.write(strImageContent) #myfile.close finally: pass return xml, objects
def valid_scan_file(request): if 'file' in request.POST: if not isinstance(request.POST['file'], FieldStorage): request.errors.add('body', ERRORS.missing_file.name, "The file parameter doesn't contain a file") return filetype = whatis(request.POST['file'].file.read()) request.POST['file'].file.seek(0) if filetype not in SUPPORTED_IMAGE_FORMATS: request.errors.add('body', ERRORS.invalid_file.name, "Unsupported file type")
def get_text(content): """Extract text from a file (currently handles PDFs and plain text).""" type = magic.whatis(content) logging.info('Magic type %s' % type) if type == 'PDF document': text = get_pdf_text(content) else: # Assume its text text = content # Clean up the text as bit text = text.strip() text = string.join(re.split('\W+', text)) return text
def downloadImages(xml, base_or_source_url='.'): objects = {} # image contents will be saved here xpathImages = etree.XPath('//cnxtra:image', namespaces={'cnxtra': 'http://cnxtra'}) imageList = xpathImages(xml) image_opener = urllib2.build_opener() image_opener.addheaders = [('User-agent', 'Mozilla/5.0')] for position, image in enumerate(imageList): strImageUrl = image.get('src') if len(strImageUrl) > 0 and len(base_or_source_url) > 0: if base_or_source_url != '.': # if we have a base url join this url strings strImageUrl = urljoin(base_or_source_url, strImageUrl) try: # strImageContent = urllib2.urlopen(strImageUrl).read() # this does not work for websites like e.g. Wikipedia fetch_timeout = 3 # timeout in seconds for trying to get images image_request = image_opener.open(strImageUrl, None, fetch_timeout) strImageContent = image_request.read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ('image/png', 'image/jpeg', 'image/gif'): image.set('mime-type', strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == 'image/jpeg': strImageName += '.jpg' elif strImageMime == 'image/png': strImageName += '.png' elif strImageMime == 'image/gif': strImageName += '.gif' strAlt = image.get('alt') if not strAlt: image.set('alt', "") image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging #myfile = open(strImageName, "wb") #myfile.write(strImageContent) #myfile.close except: print 'Warning: ' + strImageUrl + ' could not be downloaded.' # do nothing if url could not be downloaded else: print 'Warning: image url or base url not valid! One image will be skipped!' return xml, objects
def downloadImages(xml, base_or_source_url='.'): objects = {} # image contents will be saved here xpathImages = etree.XPath('//cnxtra:image', namespaces={'cnxtra':'http://cnxtra'}) imageList = xpathImages(xml) image_opener = urllib2.build_opener() image_opener.addheaders = [('User-agent', 'Mozilla/5.0')] for position, image in enumerate(imageList): strImageUrl = image.get('src') if len(strImageUrl) > 0 and len(base_or_source_url) > 0: if base_or_source_url != '.': # if we have a base url join this url strings strImageUrl = urljoin(base_or_source_url, strImageUrl) try: # strImageContent = urllib2.urlopen(strImageUrl).read() # this does not work for websites like e.g. Wikipedia fetch_timeout = 3 # timeout in seconds for trying to get images image_request = image_opener.open(strImageUrl, None, fetch_timeout) strImageContent = image_request.read() # get Mime type from image strImageMime = magic.whatis(strImageContent) # only allow this three image formats if strImageMime in ('image/png', 'image/jpeg', 'image/gif'): image.set('mime-type', strImageMime) strImageName = "gd-%04d" % (position + 1) # gd0001.jpg if strImageMime == 'image/jpeg': strImageName += '.jpg' elif strImageMime == 'image/png': strImageName += '.png' elif strImageMime == 'image/gif': strImageName += '.gif' strAlt = image.get('alt') if not strAlt: image.set('alt', "") image.text = strImageName # add contents of image to object objects[strImageName] = strImageContent # just for debugging #myfile = open(strImageName, "wb") #myfile.write(strImageContent) #myfile.close except: print 'Warning: ' + strImageUrl + ' could not be downloaded.' # do nothing if url could not be downloaded else: print 'Warning: image url or base url not valid! One image will be skipped!' return xml, objects
def TablesTreeClick(event): global rowsoffset, rowsnumber if (event is not None): rowsoffset = 0 recordlabelupdate() if (not tablestree.selection()): return seltable = tablestree.selection()[0] seltable_dbname = tablestree.set(seltable, 'filename') seltable_tablename = tablestree.set(seltable, 'tablename') # clears main text field clearmaintext() # table informations maintext(u'Dumping table: %s\nFrom file: %s' % (seltable_tablename, seltable_dbname)) log(u'Dumping table %s from database %s.' % (seltable_tablename, seltable_dbname)) if (os.path.exists(seltable_dbname)): seltabledb = sqlite3.connect(seltable_dbname) try: seltablecur = seltabledb.cursor() # read selected table indexes seltablecur.execute(u'PRAGMA table_info(%s)' % seltable_tablename) seltable_fields = seltablecur.fetchall() # append table fields to main textares seltable_fieldslist = [] maintext(u'\n\nTable Fields:') for seltable_field in seltable_fields: maintext(u'\n- ') maintext(u'%i "%s" (%s)' % (seltable_field[0], seltable_field[1], seltable_field[2])) seltable_fieldslist.append(str(seltable_field[1])) # count fields from selected table seltablecur.execute('SELECT COUNT(*) FROM %s' % seltable_tablename) seltable_rownumber = seltablecur.fetchall() maintext("\n\nThe selected table has %s rows" % seltable_rownumber[0][0]) limit = rowsnumber offset = rowsoffset * rowsnumber maintext("\nShowing %i rows from row %i." % (limit, offset)) # read all fields from selected table seltablecur.execute("SELECT * FROM %s LIMIT %i OFFSET %i" % (seltable_tablename, limit, offset)) try: # appends records to main text field maintext("\n\nTable Records:") del photoImages[:] #for seltable_record in seltable_cont: for seltable_record in seltablecur: maintext("\n- " + str(seltable_record)) for i, col in enumerate(seltable_record): try: value = str(col) except: value = col.encode( "utf8", "replace") + " (decoded unicode)" #maybe an image? if (seltable_fieldslist[i] == "data"): dataMagic = magic.whatis(value) maintext("\n- Binary data: (%s)" % dataMagic) if (dataMagic.partition("/")[0] == "image"): im = Image.open(StringIO.StringIO(value)) tkim = ImageTk.PhotoImage(im) photoImages.append(tkim) maintext("\n ") textarea.image_create(END, image=tkim) else: maintext("\n\n") maintext(dump(value, 16, 1000)) else: try: maintext("\n- " + seltable_fieldslist[i] + " : " + value) except: dataMagic = magic.whatis(value) maintext("\n- " + seltable_fieldslist[i] + " (" + dataMagic + ")") maintext("\n---------------------------------------") except: print("Unexpected error:", sys.exc_info()) seltabledb.close() except: print("Unexpected error:", sys.exc_info()) seltabledb.close()
def import_file(sess, fname, fo, recv_from, bulk): if fo: raise Exception("unable to use file object with rarfile") if fo is None: fo = open(fname, "rb") doclose = True else: doclose = False try: if ismsg(fname): print("msg") import_msg(sess, ftn.msg.MSG(fo), recv_from, bulk) elif ispkt(fname): print("pkt") # pktformat = ..get from database.. import_pkt(sess, fo, recv_from, bulk) elif isbundle(fname): sample = fo.read(8192) fo.seek(-len(sample), io.SEEK_CUR) mime = magic.whatis(sample) print("bundle (%s)" % mime) if mime == "application/zip": print("zip file") z = zipfile.ZipFile(fo) for zf in z.namelist(): if not ispkt(zf): raise Exception("non-PKT file in bundle %s" % fname) for zf in z.namelist(): print(zf) zfo = z.open(zf) import_pkt(sess, zfo, recv_from, bulk) zfo.close() z.close() elif mime == "application/x-rar": print("rar file: reopening fname") z = rarfile.RarFile(fname) for zf in z.namelist(): if not ispkt(zf): raise Exception("non-PKT file in bundle %s" % fname) for zf in z.namelist(): print(zf) zfo = z.open(zf) import_pkt(sess, zfo, recv_from, bulk) zfo.close() z.close() else: raise Exception("dont know how to unpack bundle %s" % fname) else: raise Exception("file %s is not FIDO mail file") # elif istic(f): # pass #print("tic - ignore") # # #fo=file(f, "rb") # #import_file(fo, f, "tic", recv_from) # #fo.close() # #os.unlink(f) finally: if doclose: fo.close()
def import_file(sess, fname, fo, recv_from, bulk): if fo: raise Exception("unable to use file object with rarfile") if fo is None: fo = open(fname, "rb") doclose = True else: doclose = False try: if ismsg(fname): print("msg") import_msg(sess, ftn.msg.MSG(fo), recv_from, bulk) elif ispkt(fname): print("pkt") # pktformat = ..get from database.. import_pkt(sess, fo, recv_from, bulk) elif isbundle(fname): sample = fo.read(8192) fo.seek(-len(sample), io.SEEK_CUR) mime=magic.whatis(sample) print("bundle (%s)"%mime) if mime=="application/zip": print("zip file") z=zipfile.ZipFile(fo) for zf in z.namelist(): if not ispkt(zf): raise Exception("non-PKT file in bundle %s"%fname) for zf in z.namelist(): print(zf) zfo=z.open(zf) import_pkt(sess, zfo, recv_from, bulk) zfo.close() z.close() elif mime=="application/x-rar": print("rar file: reopening fname") z=rarfile.RarFile(fname) for zf in z.namelist(): if not ispkt(zf): raise Exception("non-PKT file in bundle %s"%fname) for zf in z.namelist(): print(zf) zfo = z.open(zf) import_pkt(sess, zfo, recv_from, bulk) zfo.close() z.close() else: raise Exception("dont know how to unpack bundle %s"%fname) else: raise Exception("file %s is not FIDO mail file") # elif istic(f): # pass #print("tic - ignore") # # #fo=file(f, "rb") # #import_file(fo, f, "tic", recv_from) # #fo.close() # #os.unlink(f) finally: if doclose: fo.close()
def updateTableDisplay(self): tableName = self.currentTableOnDisplay if (tableName == None): return if (os.path.exists(self.fileName)): seltabledb = sqlite3.connect(self.fileName) try: seltablecur = seltabledb.cursor() # read selected table indexes seltablecur.execute("PRAGMA table_info(%s)" % tableName) seltable_fields = seltablecur.fetchall(); self.ui.tableContent.clear() self.ui.tableContent.setColumnCount(len(seltable_fields)) # header (fields names) fieldsNames = [] index = 0 for record in seltable_fields: #import unicodedata try: value = str(record[1]) + "\n" + str(record[2]) except: value = record[1].encode("utf8", "replace") + " (decoded unicode)" newItem = QtGui.QTableWidgetItem(value) self.ui.tableContent.setHorizontalHeaderItem(index, newItem) index = index + 1 fieldsNames.append(str(record[1])) seltablecur.execute("SELECT * FROM %s LIMIT %i OFFSET %i" % (tableName, self.itemsPerScreen, self.pageNumber * self.itemsPerScreen)) records = seltablecur.fetchall(); self.ui.tableContent.setRowCount(len(records)) self.ui.recordLabel.setText("Records %i-%i"%(self.pageNumber*self.itemsPerScreen+1, (self.pageNumber+1)*self.itemsPerScreen)) rowIndex = 0 for record in records: columnIndex = 0 for field in record: #import unicodedata try: value = str(field) except: try: value = str(field).encode("utf8", "replace") + " (decoded unicode)" except: value = "Unreadable (data)" #maybe an image? if (fieldsNames[columnIndex] == "data"): dataMagic = magic.whatis(value) if (dataMagic.partition("/")[0] == "image"): #im = Image.open(StringIO.StringIO(value)) #tkim = ImageTk.PhotoImage(im) #photoImages.append(tkim) #maintext("\n ") #textarea.image_create(END, image=tkim) qba = QtCore.QByteArray() qba.append(value) qimg = QtGui.QImage.fromData(qba) qpix = QtGui.QPixmap.fromImage(qimg) qicon = QtGui.QIcon(qpix) newItem = QtGui.QTableWidgetItem(dataMagic) newItem.setIcon(qicon) self.ui.tableContent.setRowHeight(rowIndex, 100) self.ui.tableContent.setIconSize(QtCore.QSize(100,100)) else: text = self.dump(value, 16, 1000) newItem = QtGui.QTableWidgetItem(text) # not data => text else: newItem = QtGui.QTableWidgetItem(value) self.ui.tableContent.setItem(rowIndex, columnIndex, newItem) columnIndex = columnIndex + 1 rowIndex = rowIndex + 1 except: print("Unexpected error:", sys.exc_info()) seltabledb.close() self.ui.tableContent.resizeColumnsToContents() self.ui.tableContent.resizeRowsToContents()
def put(self, file_data, file_info, safe=False): """ Almacena un nuevo archivo en este servidor del sistema de archivos distribuidos. @type file_data: C{str} @param file_data: Contenido del archivo que se quiere almacenar. @type file_info: C{dict} @param file_info: Diccionario con los metadatos del archivo. @type safe: C{bool} @param safe: Expresa si la ejecución es "thread safe" o si el método se tiene que encargar de la sincronización. Es falso por defecto """ if not safe: self._mrsw_lock.write_in() try: # Save the file in the files directory. file_name = file_info['name'] file_hash = hashlib.md5( u' '.join( [tag.decode(self._encoding) for tag in file_info['tags']]) + file_name.decode(self._encoding)).hexdigest() file_path = os.path.join(self._files_dir, os.path.sep.join(file_hash[0:5]), file_name) if not os.path.isdir(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) with open(file_path, 'w') as file: file.write(file_data) mod_time = '' if not time in file_info: mod_time = str(self._time_provider.get_time()).decode( self._encoding) else: mod_time = file_info['time'].decode(self._encoding) # Add the metadata of the file to the index. file_path = file_path[len(self._files_dir) + 1:] writer = self._index.writer() writer.delete_by_term('hash', file_hash.decode(self._encoding)) writer.add_document( hash=file_hash.decode(self._encoding), tags=u' '.join( [tag.decode(self._encoding) for tag in file_info['tags']]), description=file_info['description'].decode(self._encoding), name=file_info['name'].decode(self._encoding), size=file_info['size'].decode(self._encoding), owner=file_info['owner'].decode(self._encoding), group=file_info['group'].decode(self._encoding), perms=file_info['perms'].decode(self._encoding), path=file_path.decode(self._encoding), type=magic.whatis(file_data), time=mod_time, action=u'add') writer.commit() # Update the empty space of this server. self._status['empty_space'] -= long(file_info['size']) finally: if not safe: self._mrsw_lock.write_out()
def put(self, file_data, file_info, safe=False): """ Almacena un nuevo archivo en este servidor del sistema de archivos distribuidos. @type file_data: C{str} @param file_data: Contenido del archivo que se quiere almacenar. @type file_info: C{dict} @param file_info: Diccionario con los metadatos del archivo. @type safe: C{bool} @param safe: Expresa si la ejecución es "thread safe" o si el método se tiene que encargar de la sincronización. Es falso por defecto """ if not safe: self._mrsw_lock.write_in() try: # Save the file in the files directory. file_name = file_info['name'] file_hash = hashlib.md5(u' ' .join([tag.decode(self._encoding) for tag in file_info['tags']]) + file_name.decode(self._encoding)).hexdigest() file_path = os.path.join(self._files_dir, os.path.sep.join(file_hash[0:5]), file_name) if not os.path.isdir(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) with open(file_path, 'w') as file: file.write(file_data) mod_time ='' if not time in file_info: mod_time = str(self._time_provider.get_time()).decode(self._encoding) else: mod_time = file_info['time'].decode(self._encoding) # Add the metadata of the file to the index. file_path = file_path[len(self._files_dir) + 1:] writer = self._index.writer() writer.delete_by_term('hash', file_hash.decode(self._encoding)) writer.add_document( hash=file_hash.decode(self._encoding), tags=u' '.join([tag.decode(self._encoding) for tag in file_info['tags']]), description=file_info['description'].decode(self._encoding), name=file_info['name'].decode(self._encoding), size=file_info['size'].decode(self._encoding), owner=file_info['owner'].decode(self._encoding), group=file_info['group'].decode(self._encoding), perms=file_info['perms'].decode(self._encoding), path=file_path.decode(self._encoding), type=magic.whatis(file_data), time=mod_time, action=u'add' ) writer.commit() # Update the empty space of this server. self._status['empty_space'] -= long(file_info['size']) finally: if not safe: self._mrsw_lock.write_out()
def TablesTreeClick(event): global rowsoffset, rowsnumber if (event is not None): rowsoffset = 0 recordlabelupdate() if (not tablestree.selection()): return; seltable = tablestree.selection()[0] seltable_dbname = tablestree.set(seltable, 'filename') seltable_tablename = tablestree.set(seltable, 'tablename') # clears main text field clearmaintext() # table informations maintext(u'Dumping table: %s\nFrom file: %s' % (seltable_tablename, seltable_dbname)) log(u'Dumping table %s from database %s.' % (seltable_tablename, seltable_dbname)) if (os.path.exists(seltable_dbname)): seltabledb = sqlite3.connect(seltable_dbname) try: seltablecur = seltabledb.cursor() # read selected table indexes seltablecur.execute(u'PRAGMA table_info(%s)' % seltable_tablename) seltable_fields = seltablecur.fetchall(); # append table fields to main textares seltable_fieldslist = [] maintext(u'\n\nTable Fields:') for seltable_field in seltable_fields: maintext(u'\n- ') maintext(u'%i "%s" (%s)' % (seltable_field[0], seltable_field[1], seltable_field[2])) seltable_fieldslist.append(str(seltable_field[1])) # count fields from selected table seltablecur.execute('SELECT COUNT(*) FROM %s' % seltable_tablename) seltable_rownumber = seltablecur.fetchall(); maintext("\n\nThe selected table has %s rows" % seltable_rownumber[0][0]) limit = rowsnumber offset = rowsoffset*rowsnumber maintext("\nShowing %i rows from row %i." % (limit, offset)) # read all fields from selected table seltablecur.execute("SELECT * FROM %s LIMIT %i OFFSET %i" % (seltable_tablename, limit, offset)) try: # appends records to main text field maintext("\n\nTable Records:") del photoImages[:] #for seltable_record in seltable_cont: for seltable_record in seltablecur: maintext("\n- " + str(seltable_record)) for i, col in enumerate(seltable_record): try: value = str(col) except: value = col.encode("utf8", "replace") + " (decoded unicode)" #maybe an image? if (seltable_fieldslist[i] == "data"): dataMagic = magic.whatis(value) maintext("\n- Binary data: (%s)" %dataMagic) if (dataMagic.partition("/")[0] == "image"): im = Image.open(StringIO.StringIO(value)) tkim = ImageTk.PhotoImage(im) photoImages.append(tkim) maintext("\n ") textarea.image_create(END, image=tkim) else: maintext("\n\n") maintext(dump(value, 16, 1000)) else: try: maintext("\n- " + seltable_fieldslist[i] + " : " + value) except: dataMagic = magic.whatis(value) maintext("\n- " + seltable_fieldslist[i] + " (" + dataMagic + ")") maintext("\n---------------------------------------") except: print("Unexpected error:", sys.exc_info()) seltabledb.close() except: print("Unexpected error:", sys.exc_info()) seltabledb.close()
def _detect_content_type(self): significant_bytes = self.content.open('r').read(MAGIC_FILE_BYTES) return magic.whatis(significant_bytes)