def _file_metadata(self, an_uri, file_descr): """Get metadata about the actual file and add it to the FileDescription """ def convert_time(st_time): dt = datetime.fromtimestamp(stat.st_mtime) return dt.isoformat() stat = os.stat(an_uri) if file_descr.metadata is None: file_descr.metadata = {} file_descr.metadata['pw_name'] = MetaDataValue( type=MetaDataType.STRING, string_value=pwd.getpwuid(stat.st_uid).pw_name) file_descr.metadata['gr_name'] = MetaDataValue( type=MetaDataType.STRING, string_value=grp.getgrgid(stat.st_gid).gr_name) file_descr.metadata['st_size'] = MetaDataValue(type=MetaDataType.INT, int_value=stat.st_size) file_descr.metadata['st_ctime'] = MetaDataValue( type=MetaDataType.STRING, string_value=convert_time(stat.st_ctime)) file_descr.metadata['st_mtime'] = MetaDataValue( type=MetaDataType.STRING, string_value=convert_time(stat.st_mtime)) #TODO: try: from damn_at.repository import Repository repo = Repository('/home/sueastside/dev/DAMN/damn-test-files') repo.get_meta_data(an_uri, file_descr) except Exception as repo_exception: logger.debug("Unable to extract repository information: %s", str(repo_exception))
def analyze(self, an_uri): fileid = FileId(filename=os.path.abspath(an_uri)) file_descr = FileDescription(file=fileid) file_descr.assets = [] text_mimetype = mimetypes.guess_type(an_uri)[0] asset_descr = AssetDescription(asset=AssetId( subname='content', mimetype=text_mimetype, file=fileid )) num_lines = sum(1 for line in open(an_uri)) with magic.Magic(flags=magic.MAGIC_MIME_ENCODING) as mm: charset = mm.id_filename(an_uri) asset_descr.metadata = {} asset_descr.metadata['lines'] = MetaDataValue( type=MetaDataType.INT, int_value=num_lines ) asset_descr.metadata['charset'] = MetaDataValue( type=MetaDataType.STRING, string_value=charset ) file_descr.assets.append(asset_descr) return file_descr
def analyze(self, an_uri): fileid = FileId(filename=os.path.abspath(an_uri)) file_descr = FileDescription(file=fileid) file_descr.assets = [] image_mimetype = mimetypes.guess_type(an_uri)[0] asset_descr = AssetDescription(asset=AssetId( subname='main layer', mimetype=image_mimetype, file=fileid)) try: pro = subprocess.Popen(['exiftool', an_uri], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = pro.communicate() if pro.returncode != 0: msg = 'ImageAnalyzer failed %s with error code %d!:\n%s' % ( an_uri, pro.returncode, str(err)) LOG.error(msg) raise AnalyzerException(msg) except OSError as e: msg = 'ImageAnalyzer failed %s:\n%s' % (an_uri, e) LOG.error(msg) raise e(msg) meta = {} flag = 0 lines = str(out).strip().split('\n') for line in lines: line = line.split(':', 1) if len(line) == 1: line = line[0].split('=') line = [l.strip() for l in line] if line[0] == 'MIME Type': flag = 1 if flag == 1 and line[0] not in ['MIME Type', 'Image Size']: meta[line[0].lower().replace(' ', '_')] = line[1] from damn_at.analyzers.image import metadata extractor_map = { 'image/png': metadata.MetaDataPNG, 'image/jpeg': metadata.MetaDataJPG, 'image/x-ms-bmp': metadata.MetaDataBMP, 'image/x-photoshop': metadata.MetaDataPSD, 'application/x-xcf': metadata.MetaDataXCF, } if image_mimetype in extractor_map: asset_descr.metadata = extractor_map[image_mimetype].extract(meta) else: asset_descr.metadata = {} for key, value in meta.items(): if key not in asset_descr.metadata: asset_descr.metadata['exif-' + key] = MetaDataValue( type=MetaDataType.STRING, string_value=value) file_descr.assets.append(asset_descr) return file_descr
def get_meta_data(self, an_uri, file_descr): """ Get git commit metadata for the specified file. """ commit = self.repo.heads.master.commit #path = os.path.relpath(an_uri, self.path) #blob = commit.tree/path file_descr.metadata['git.author.name'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.author.name) file_descr.metadata['git.author.email'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.author.email) file_descr.metadata['git.message'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.message) file_descr.metadata['git.committer.name'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.committer.name) file_descr.metadata['git.committer.email'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.committer.email) file_descr.metadata['git.name_rev'] = MetaDataValue( type=MetaDataType.STRING, string_value=commit.name_rev) file_descr.metadata['git.remotes.origin.url'] = MetaDataValue( type=MetaDataType.STRING, string_value=self.repo.remotes.origin.config_reader.get('url')) return file_descr
def analyze(self, an_uri): fileid = FileId(filename=os.path.abspath(an_uri)) file_descr = FileDescription(file=fileid) file_descr.assets = [] video_mimetype = mimetypes.guess_type(an_uri)[0] asset_descr = AssetDescription( asset=AssetId(subname=os.path.basename(an_uri), mimetype=video_mimetype, file=fileid)) try: pro = subprocess.Popen(['exiftool', an_uri], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = pro.communicate() if pro.returncode != 0: LOG.debug( "VideoAnalyzer failed %s with error code %d" % (an_uri, pro.returncode), out, err) return False except OSError: LOG.debug("VideoAnalyzer failed %s\n\t%s\n\t%s" % (an_uri, out, err)) return False meta = {} flag = False lines = out.strip().split('\n') for line in lines: line = line.split(':', 1) if len(line) == 1: line = line.split('=') line = [l.strip() for l in line] if line[0] == 'MIME Type': flag = True continue if flag: meta[line[0].lower().replace(' ', '_')] = line[1] if line[0] == 'Frame Rate': meta['video_frame_rate'] = meta.pop('frame_rate') asset_descr.metadata = metadata.MetaDataExif.extract(meta) for key, value in meta.items(): if key not in asset_descr.metadata: asset_descr.metadata['Exif-' + key] = MetaDataValue( type=MetaDataType.STRING, string_value=value) file_descr.assets.append(asset_descr) return file_descr
def extract(cls, context): metadata = {} for field in dir(cls): if not field.startswith('__') and field not in [ 'extract', 'fields', 'convert' ]: type, func = getattr(cls, field) type_name = MetaDataType._VALUES_TO_NAMES[type].lower( ) + '_value' kwargs = {'type': type} kwargs[type_name] = func(context) if kwargs[type_name] is not None: # Ignore None values! kwargs[type_name] = cls.convert(type, kwargs[type_name]) metadata[field] = MetaDataValue(**kwargs) return metadata
def analyze(self, anURI): fileid = FileId(filename=os.path.abspath(anURI)) file_descr = FileDescription(file=fileid) file_descr.assets = [] asset_descr = AssetDescription( asset=AssetId(subname=os.path.basename(anURI), mimetype=mimetypes.guess_type(anURI, False)[0], file=fileid)) try: pro = subprocess.Popen(['sox', '--i', anURI], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = pro.communicate() if pro.returncode != 0: print(("E: SoundAnalyzer failed %s with error code %d! " % (anURI, pro.returncode), out, err)) return False except OSError: print(("E: SoundAnalyzer failed %s!" % anURI, out, err)) return False meta = {} lines = out.decode("utf-8").strip().split('\n') for line in lines: line = line.split(':', 1) if len(line) == 1: line = line[0].split('=') line = [l.strip() for l in line] if line[0] in ['Input File', 'Comment']: continue meta[line[0].lower().replace(' ', '_')] = line[1] asset_descr.metadata = metadata.MetaDataSox.extract(meta) for key, value in meta.items(): # Add none default metadata. if key not in asset_descr.metadata: asset_descr.metadata['Sox-' + key] = MetaDataValue( type=MetaDataType.STRING, string_value=value) file_descr.assets.append(asset_descr) return file_descr
def analyze(self, an_uri): fileid = FileId(filename=os.path.abspath(an_uri)) file_descr = FileDescription(file=fileid) file_descr.assets = [] assimp_mimetype = 'application/assimp' scene = None try: scene = pyassimp.load(an_uri) textures = {} materials = {} from damn_at.analyzers.mesh.metadata import ( MetaDataAssimpTexture, MetaDataAssimpMesh ) for i, texture in enumerate(scene.textures): name = texture.name if texture.name else 'texture-'+str(i) asset_descr = AssetDescription(asset=AssetId( subname=name, mimetype=assimp_mimetype + ".texture", file=fileid )) asset_descr.metadata = MetaDataAssimpTexture.extract(texture) file_descr.assets.append(asset_descr) textures[i] = asset_descr for i, material in enumerate(scene.materials): properties = {} for key, value in material.properties.items(): properties[key] = value name = properties.get('name', 'material-'+str(i)) asset_descr = AssetDescription(asset=AssetId( subname=name, mimetype=assimp_mimetype + ".material", file=fileid )) asset_descr.metadata = {} for key, value in properties.items(): if key == 'name' or key == 'file': continue asset_descr.metadata[key] = MetaDataValue( type=MetaDataType.STRING, string_value=str(value) ) file_descr.assets.append(asset_descr) materials[i] = asset_descr for i, mesh in enumerate(scene.meshes): name = mesh.name if mesh.name else 'mesh-' + str(i) asset_descr = AssetDescription(asset=AssetId( subname=name, mimetype=assimp_mimetype + ".mesh", file=fileid )) asset_descr.metadata = MetaDataAssimpMesh.extract(mesh) asset_descr.dependencies = [] # Dependencies if mesh.materialindex is not None: if mesh.materialindex in materials: asset_descr.dependencies.append( materials[mesh.materialindex].asset ) file_descr.assets.append(asset_descr) finally: pyassimp.release(scene) ''' obj = Loader(an_uri) from damn_at.analyzers.mesh.metadata import ( MetaDataWaveFrontDefault, MetaDataWaveFrontGroup ) d_asset_descr = AssetDescription(asset=AssetId( subname='default', mimetype="application/wavefront-obj", file=fileid )) d_asset_descr.metadata = MetaDataWaveFrontDefault.extract(obj) file_descr.assets.append(d_asset_descr) for name, group in obj.groups.items(): if name != 'default': asset_descr = AssetDescription(asset=AssetId( subname=name, mimetype="application/wavefront-obj.group", file=fileid )) asset_descr.metadata = MetaDataWaveFrontGroup.extract(group) asset_descr.dependencies = [d_asset_descr.asset] file_descr.assets.append(asset_descr)''' return file_descr