def generate_thumbnail(doc): "Generate a Document thumbnail." if not doc.document: task_logger.error( "Document has not been copied to the local filesystem") return path = doc.document.path # TODO: Dispatch on extension. Handle other common file types if extension(path) != "pdf": return out_prefix = path.join(path.dirname(path), "thumbnail") proc = subprocess.Popen([ "pdftoppm", "-jpeg", "-singlefile", "-scale-to", "200", path, out_prefix ], stderr=subprocess.PIPE) _, err = proc.communicate() if proc.returncode: raise Exception("Failed for document %s" % doc.pk, err) thumb_path = out_prefix + path.extsep + "jpg" with open(thumb_path, "rb") as thumb_file: doc.thumbnail.save("thumbnail.jpg", File(thumb_file)) return thumb_path
def find_music_paths(root_dir, force=False): music_paths = [] other_paths = [] for root, dirnames, filenames in os.walk(root_dir): subroot = root[len(root_dir):].lstrip('/').split('/')[0] if subroot not in ('artists', 'compilations', 'corrupt', 'other') or force: matched_filenames = fnmatch.filter(filenames, '*.[mM][pP]3') #fnmatch.filter(filenames, '*.[mM]4[aA]') #matched_filenames = fnmatch.filter(filenames, '*Our Lives*') #if 'Elemental' in root: # matched_filenames = fnmatch.filter(filenames, '*.[mM][pP]3') #else: # matched_filenames = [] unmatched_filenames = set(filenames) - set(matched_filenames) music_paths.extend( [os.path.join(root, o) for o in matched_filenames]) other_paths.extend( [os.path.join(root, o) for o in unmatched_filenames]) other_extensions = set([utils.extension(o) for o in other_paths]) if other_extensions: print 'warning - found other file types: ', other_extensions return (music_paths, other_paths)
def fetch_document(doc, force=False): """Copy the given document (proposal.models.Document) to a local directory. """ if not force and doc.document and os.path.exists(doc.document.path): return url = doc.url url_components = parse.urlsplit(url) ext = extension(os.path.basename(url_components.path)) filename = "download.%s" % ext path = os.path.join(settings.MEDIA_ROOT, "doc", str(doc.pk), filename) # Ensure that the intermediate directories exist: pathdir = os.path.dirname(path) os.makedirs(pathdir, exist_ok=True) with request.urlopen(url) as resp, open(path, "wb") as out: shutil.copyfileobj(resp, out) doc.document = path file_published = files_metadata.published_date(path) if file_published: doc.published = file_published doc.save() return doc
def generate_thumbnail(doc): "Generate a Document thumbnail." if not doc.document: raise FileNotFoundError( "Document has not been copied to the local filesystem") doc_path = doc.document.path # TODO: Dispatch on extension. Handle other common file types if extension(doc_path) != "pdf": return out_prefix = path.join(path.dirname(doc_path), "thumbnail") proc = subprocess.Popen( [ "pdftoppm", "-jpeg", "-singlefile", "-scale-to", "200", doc_path, out_prefix ], stderr=subprocess.PIPE) _, err = proc.communicate() if proc.returncode: raise Exception("Failed for document %s" % doc.pk, err) thumb_path = out_prefix + path.extsep + "jpg" with open(thumb_path, "rb") as thumb_file: doc.thumbnail.save("thumbnail.jpg", File(thumb_file)) return thumb_path
def generate_doc_thumbnail(doc): "Generate a Document thumbnail." docfile = doc.document if not docfile: logger.error("Document has not been copied to the local filesystem") return path = docfile.name # TODO: Dispatch on extension. Handle other common file types if extension(path) != "pdf": logger.warn("Document %s does not appear to be a PDF.", path) return out_prefix = os.path.join(os.path.dirname(path), "thumbnail") proc = subprocess.Popen(["pdftoppm", "-jpeg", "-singlefile", "-scale-to", "200", path, out_prefix], stderr=subprocess.PIPE) _, err = proc.communicate() if proc.returncode: logger.error("Failed to generate thumbnail for document %s: %s", path, err) raise Exception("Failed for document %s" % doc.pk) else: thumb_path = out_prefix + os.path.extsep + "jpg" logger.info("Generated thumbnail for Document #%i: '%s'", doc.pk, thumb_path) doc.thumbnail = thumb_path doc.save() return thumb_path
def find_music_paths(root_dir,force=False): music_paths = [] other_paths = [] for root, dirnames, filenames in os.walk(root_dir): subroot = root[len(root_dir):].lstrip('/').split('/')[0] if subroot not in ('artists','compilations','corrupt','other') or force: matched_filenames = fnmatch.filter(filenames, '*.[mM][pP]3') #fnmatch.filter(filenames, '*.[mM]4[aA]') #matched_filenames = fnmatch.filter(filenames, '*Our Lives*') #if 'Elemental' in root: # matched_filenames = fnmatch.filter(filenames, '*.[mM][pP]3') #else: # matched_filenames = [] unmatched_filenames = set(filenames) - set(matched_filenames) music_paths.extend([os.path.join(root,o) for o in matched_filenames]) other_paths.extend([os.path.join(root,o) for o in unmatched_filenames]) other_extensions = set([utils.extension(o) for o in other_paths]) if other_extensions: print 'warning - found other file types: ',other_extensions return (music_paths,other_paths)
def generate_doc_thumbnail(doc): "Generate a Document thumbnail." docfile = doc.document if not docfile: logger.error("Document has not been copied to the local filesystem") return path = docfile.name # TODO: Dispatch on extension. Handle other common file types if extension(path) != "pdf": logger.warn("Document %s does not appear to be a PDF.", path) return out_prefix = os.path.join(os.path.dirname(path), "thumbnail") proc = subprocess.Popen([ "pdftoppm", "-jpeg", "-singlefile", "-scale-to", "200", path, out_prefix ], stderr=subprocess.PIPE) _, err = proc.communicate() if proc.returncode: logger.error("Failed to generate thumbnail for document %s: %s", path, err) raise Exception("Failed for document %s" % doc.pk) else: thumb_path = out_prefix + os.path.extsep + "jpg" doc.thumbnail = thumb_path doc.save() return thumb_path
def _collect_data(path): recs = [] for f in os.listdir(path): # encoding dance to deal with surrogate characters from listdir f = f.encode('utf-8', 'surrogateescape').decode('utf-8') if utils.extension(f) == 'xml': for r in _extract_data(path, f): recs.append(r) recs = sorted(recs, key=itemgetter('title')) return recs
def mkpath(_dir,artist,album,track,name,tag,compilation=False): ext = utils.extension(tag.linkedFile.name) if compilation: filename = artist+' - '+(album+' - ' if album is not None else '')+(unicode(track)+' - ' if track is not None else '')+name+"."+ext args = ('compilations',album,filename) else: filename = artist+' - '+(album+' - ' if album is not None else '')+(unicode(track)+' - ' if track is not None else '')+name+"."+ext args = ('artists',artist,album,filename) if album else ('artists',artist,filename) args = utils.correct_path(args) return os.path.join(_dir,*args)
def finalize(self, dld, dest, title): try: fullsrc = self.__get_fullpath(dld) fulldst = dest + '/' + title + utils.extension(dld.filename) os.rename(fullsrc, fulldst) dld.status = consts.STATUS_PROCESSED dld.save() return True except Exception as ex: print(ex) return False
def update_index_jinja(path): recs = [] for f in os.listdir(path): if utils.extension(f) == 'xml': for r in _extract_data(path, f): recs.append(r) recs = sorted(recs, key=itemgetter('title')) env = Environment(loader = FileSystemLoader(os.path.dirname(os.path.realpath(__file__)))) template = env.get_template('index.jinja') # specify encoding explicitly, since the shell that git spawns us in does # not have a locale set, so open() would default to ANSI_X3.4-1968 template.stream(recipes=recs).dump(path + 'index.html', encoding='utf-8')
def fetch_image(image): url = image.url if url: components = parse.urlsplit(url) ext = extension(os.path.basename(components.path)) filename = "image_%s.%s" % (image.pk, ext) path = os.path.join(settings.MEDIA_ROOT, "image", filename) with request.urlopen(url) as resp, open(path, "wb") as out: shutil.copyfileobj(resp, out) image.document = path image.save() return image
def rename_document(doc): docpath = doc.document and doc.document.path if not docpath: logger.error("Document %s has no local file" % doc.pk) return docdir, docname = os.path.split(docpath) ext = extension(docname) new_path = os.path.join(docdir, "download.%s" % ext) try: do_rename(doc, new_path) except Exception as err: logger.error("Error while attempting rename:", err) return
def save_from_url(doc, url, filename_base=None): """ Downloads the document at `url` and saves it locally, storing the path in the given Document. :param doc: a Document model :param url: URL string :param filename_base: optional subpath specifying where to save the document Returns a tuple: (success, status_code, updated) """ filename = path.basename(parse.urlsplit(url).path) if filename_base: filename = "{}.{}".format(filename_base, extension(filename)) exists = doc.document and path.exists(doc.document.path) if exists: published = doc.published headers = { "If-Modified-Since": published.strftime("%a, %d %b %Y %H:%M:%S GMT") } else: headers = {} with requests.get(url, headers=headers, stream=True) as response: if response: if response.status_code == 304: return (True, response.status_code, False) doc.document.save(filename, File(response.raw), save=False) file_published = files.published_date(doc.document.path) if file_published: doc.published = file_published elif "Last-Modified" in response.headers: doc.published = dt_parse(response.headers["Last-Modified"]) doc.save() return (True, response.status_code, exists) else: return (False, response.status_code, response.reason)
def fetch_document(doc_id): """Copy the given document (proposal.models.Document) to a local directory. """ doc = Document.objects.get(pk=doc_id) url = doc.url if doc.document and os.path.exists(doc.document.path): # Has the document been updated? updated = proposal_utils.last_modified(doc.url) # No? Then we're good. if not updated or updated <= doc.published: return doc.pk # TODO Special handling of updated documents url_components = parse.urlsplit(url) ext = extension(os.path.basename(url_components.path)) filename = "download.%s" % ext path = os.path.join(settings.MEDIA_ROOT, "doc", str(doc.pk), filename) # Ensure that the intermediate directories exist: pathdir = os.path.dirname(path) os.makedirs(pathdir, exist_ok=True) logger.info("Fetching Document #%i", doc.pk) with request.urlopen(url) as resp, open(path, "wb") as out: shutil.copyfileobj(resp, out) doc.document = path logger.info("Copied Document #%i to %s", doc.pk, path) file_published = files_metadata.published_date(path) if file_published: doc.published = file_published elif "Last-Modified" in resp.headers: doc.published = dt_parse(resp.headers["Last-Modified"]) doc.save() return doc.pk
def mkpath(_dir, artist, album, track, name, tag, compilation=False): ext = utils.extension(tag.linkedFile.name) if compilation: filename = artist + ' - ' + (album + ' - ' if album is not None else '') + (unicode(track) + ' - ' if track is not None else '') + name + "." + ext args = ('compilations', album, filename) else: filename = artist + ' - ' + (album + ' - ' if album is not None else '') + (unicode(track) + ' - ' if track is not None else '') + name + "." + ext args = ('artists', artist, album, filename) if album else ('artists', artist, filename) args = utils.correct_path(args) return os.path.join(_dir, *args)
def update_index_simple(path): """ write index.html in/for path """ root = etree.Element('html') head = etree.SubElement(root, 'head') title = etree.SubElement(head, 'title') title.text = 'Recipes' meta = etree.SubElement(head, 'meta') meta.attrib['http-equiv'] = "Content-Type" meta.attrib['content'] = "text/html;charset=utf-8" body = etree.SubElement(root, 'body') for f in os.listdir(path): if utils.extension(f) == 'xml': _append_recipes(body, path, f) with open(path + 'index.html', 'wb') as f: f.write(b'<!DOCTYPE HTML>') etree.ElementTree(root).write(f, encoding='utf-8')
def upload_file(): global UPLOAD_NAME, __predicted, __lock while __lock: pass file = flask.request.files['file'] if not utils.allowed_file(file.filename): return flask.make_response( flask.jsonify({ 'error': 'File type not allowed. Please upload a file in CSV, TSV or TXT format.' }), 400) UPLOAD_NAME = 'temp.' + utils.extension(file.filename) file.save(os.path.join(UPLOAD_FOLDER, UPLOAD_NAME)) __predicted = False return flask.jsonify({'error': None})
def save_from_url(doc, url, filename_base=None): filename = path.basename(parse.urlsplit(url).path) if filename_base: filename = "{}.{}".format(filename_base, extension(filename)) with request.urlopen(url) as resp: doc.document.save(filename, File(resp), save=False) file_published = files_metadata.published_date(path) if file_published: doc.published = file_published elif "Last-Modified" in resp.headers: doc.published = dt_parse(resp.headers["Last-Modified"]) doc.save() return doc
def save_from_url(doc, url, filename_base=None): """ Downloads the document at `url` and saves it locally, storing the path in the given Document. :param doc: a Document model :param url: URL string :param filename_base: optional subpath specifying where to save the document Returns a tuple: (success, status_code, updated) """ filename = path.basename(parse.urlsplit(url).path) if filename_base: filename = "{}.{}".format(filename_base, extension(filename)) exists = doc.document and path.exists(doc.document.path) if exists: published = doc.published headers = {"If-Modified-Since": published.strftime("%a, %d %b %Y %H:%M:%S GMT")} else: headers = {} with requests.get(url, headers=headers, stream=True) as response: if response: if response.status_code == 304: return (True, response.status_code, False) doc.document.save(filename, File(response.raw), save=False) file_published = files.published_date(doc.document.path) if file_published: doc.published = file_published elif "Last-Modified" in response.headers: doc.published = dt_parse(response.headers["Last-Modified"]) doc.save() return (True, response.status_code, exists) else: return (False, response.status_code, response.reason)
def update_index_simple(path): """ write index.html in/for path """ root = etree.Element('html') head = etree.SubElement(root, 'head') title = etree.SubElement(head, 'title') title.text = 'Recipes' meta = etree.SubElement(head, 'meta') meta.attrib['http-equiv'] = "Content-Type" meta.attrib['content'] = "text/html;charset=utf-8" body = etree.SubElement(root, 'body') for f in os.listdir(path): # encoding dance to deal with surrogate characters from listdir f = f.encode('utf-8', 'surrogateescape').decode('utf-8') if utils.extension(f) == 'xml': _append_recipes(body, path, f) with open(path + 'index.html', 'wb') as f: f.write(b'<!DOCTYPE HTML>') etree.ElementTree(root).write(f, encoding='utf-8')
def normalize_document_names(): "Rename all local Documents to have names of the form download.<ext>." for doc in Document.objects.all(): docpath = doc.document and doc.document.path if not docpath: continue docdir, docname = os.path.split(docpath) ext = extension(docname) newpath = os.path.join(docdir, "download.%s" % ext) try: os.rename(docpath, newpath) except Exception as err: logger.error("Error while attempting rename:", err) return try: doc.document = newpath doc.save() except Exception as err: os.rename(newpath, docpath)
def print_src_lists(mutatees, platform, info, directory): out = open("%s/srclists.cmake" % directory, "w") # We want to build the list of sources for each mutatee; since each mutatee # gets compiled a bunch of different ways (32/64, no/low/high optimization, # static/dynamic, per-compiler) but the sources stay the same, we can # greatly simplify the output cmake file by making variables for each # unique source list. However, the input "mutatees" exhaustively # enumerates the entire list of mutatees. So instead, we iterate over # everything and build a map of sources -> mutatees that care about them. # ... ugh. # Since CMake expects everything to be in its current directory, we have to # output the sources with relative paths # Make sure this agrees with the subdirectory structure for CMakeLists # as defined below in print_compiler_cmakefiles out.write("set (SRC ${PROJECT_SOURCE_DIR}/src)\n") out.write("find_package (Dyninst REQUIRED COMPONENTS common OPTIONAL_COMPONENTS symtabAPI dyninstAPI instructionAPI proccontrol)\n") srcs_to_mutatees = {} preproc_to_mutatees = {} for m in mutatees: collected_srcs = ['\t${SRC}/mutatee_driver.c'] # If it's a group mutatee we need to add the generated group file if (is_groupable(m, info) == '1'): collected_srcs.append('\t${PROJECT_SOURCE_DIR}/%s/%s_group.c\n' % (platform['name'], m['name'])) # Preprocessed == module specific, apparently for s in m['preprocessed_sources']: collected_srcs.append('\t${SRC}/%s/%s\n' % (m['module'], s)) collected_srcs.append(" ") preproc_to_mutatees.setdefault(s, m) # Raw == generic for s in m['raw_sources']: collected_srcs.append('\t${SRC}/%s\n' % s) collected_srcs.append(" ") key = ''.join(collected_srcs) srcs_to_mutatees.setdefault(key, []).append(m) m['srclist'] = key # Now that we have this map of sources to mutatees that use them, # create CMake lists that mimic the structure. This way we can reference # the list instead of reiterating. This is actually an important step; # the old mutatee Makefile for x86_64-linux was ~7M. I'm aiming for 1M # for the CMake file. srcs_to_vars = dict() i = 0 for s, mlist in srcs_to_mutatees.iteritems(): out.write("set (SOURCE_LIST_%d \n%s)\n" % (i, s)) for m in mlist: m['srclist_index'] = i srcs_to_vars[s] = i i += 1 for s, m in preproc_to_mutatees.iteritems(): groupable = is_groupable(m, info) module = m['module'] if ((utils.extension(s) == ".c") | (utils.extension(s) == ".C")): # Figure out the test name from the source file... ext = utils.extension(s) basename = s[0:-len('_mutatee') - len(ext)] out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_DEFINITIONS TEST_NAME=%s)\n" % (module, s, basename)) out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_DEFINITIONS GROUPABLE=%s)\n" % (module, s, groupable)) elif (utils.extension(s) == ".asm" and "nasm_asm" in platform["auxilliary_compilers"]): out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_FLAGS -dPLATFORM=%s)\n" % (module, s, platform["name"])) # Skip raw sources; they don't need the GROUPABLE and TEST_NAMEs set out.close()
def extension(path, *_): return utils.extension(path).lower()
def print_src_lists(mutatees, platform, info, directory): out = open("%s/srclists.cmake" % directory, "w") # We want to build the list of sources for each mutatee; since each mutatee # gets compiled a bunch of different ways (32/64, no/low/high optimization, # static/dynamic, per-compiler) but the sources stay the same, we can # greatly simplify the output cmake file by making variables for each # unique source list. However, the input "mutatees" exhaustively # enumerates the entire list of mutatees. So instead, we iterate over # everything and build a map of sources -> mutatees that care about them. # ... ugh. # Since CMake expects everything to be in its current directory, we have to # output the sources with relative paths # Make sure this agrees with the subdirectory structure for CMakeLists # as defined below in print_compiler_cmakefiles out.write("set (SRC ${PROJECT_SOURCE_DIR}/src)\n") srcs_to_mutatees = {} preproc_to_mutatees = {} for m in mutatees: collected_srcs = ['\t${SRC}/mutatee_driver.c'] # If it's a group mutatee we need to add the generated group file if (is_groupable(m, info) == '1'): collected_srcs.append('\t${PROJECT_SOURCE_DIR}/%s/%s_group.c\n' % (platform['name'], m['name'])) # Preprocessed == module specific, apparently for s in m['preprocessed_sources']: collected_srcs.append('\t${SRC}/%s/%s\n' % (m['module'], s)) collected_srcs.append(" ") preproc_to_mutatees.setdefault(s, m) # Raw == generic for s in m['raw_sources']: collected_srcs.append('\t${SRC}/%s\n' % s) collected_srcs.append(" ") key = ''.join(collected_srcs) srcs_to_mutatees.setdefault(key, []).append(m) m['srclist'] = key # Now that we have this map of sources to mutatees that use them, # create CMake lists that mimic the structure. This way we can reference # the list instead of reiterating. This is actually an important step; # the old mutatee Makefile for x86_64-linux was ~7M. I'm aiming for 1M # for the CMake file. srcs_to_vars = dict() i = 0 for s, mlist in srcs_to_mutatees.iteritems(): out.write("set (SOURCE_LIST_%d \n%s)\n" % (i, s)) for m in mlist: m['srclist_index'] = i srcs_to_vars[s] = i i += 1 for s, m in preproc_to_mutatees.iteritems(): groupable = is_groupable(m, info) module = m['module'] if ((utils.extension(s) == ".c") | (utils.extension(s) == ".C")): # Figure out the test name from the source file... ext = utils.extension(s) basename = s[0:-len('_mutatee') - len(ext)] out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_DEFINITIONS TEST_NAME=%s)\n" % (module, s, basename)) out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_DEFINITIONS GROUPABLE=%s)\n" % (module, s, groupable)) elif (utils.extension(s) == ".asm" and "nasm_asm" in platform["auxilliary_compilers"]): out.write("set_property (SOURCE ${SRC}/%s/%s APPEND PROPERTY COMPILE_FLAGS -dPLATFORM=%s)\n" % (module, s, platform["name"])) # Skip raw sources; they don't need the GROUPABLE and TEST_NAMEs set out.close()
def add(self, dispatch_val, fn=None): def adder(fn): self.table[dispatch_val] = fn if fn: adder(fn) else: return adder @property def default(self): def adder(fn): self.default_fn = fn return adder def __call__(self, *args): val = self.dispatch_fn(*args) if val in self.table: return self.table[val](*args) elif self.default_fn: return self.default_fn(*args) else: raise ValueError("Unknown dispatch value:", val) published_date = multimethod(lambda path: extension(path).lower()) published_date.default(lambda _: None)
shutil.copytree(os.path.join(music_dir,'corrupt'),os.path.join(output_music_dir,'corrupt')) print 'copying other' shutil.copytree(os.path.join(music_dir,'other'),os.path.join(output_music_dir,'other')) elif not force and force_move: print 'moving artists' shutil.move(os.path.join(music_dir,'artists'),os.path.join(output_music_dir,'artists')) print 'moving compilations' shutil.move(os.path.join(music_dir,'compilations'),os.path.join(output_music_dir,'compilations')) print 'moving corrupt' shutil.move(os.path.join(music_dir,'corrupt'),os.path.join(output_music_dir,'corrupt')) print 'moving other' shutil.move(os.path.join(music_dir,'other'),os.path.join(output_music_dir,'other')) if copy_others: for other_path in other_paths: extension = utils.extension(other_path) (basename,filename) = os.path.split(other_path) music_file_output = os.path.join(output_music_dir,'other',extension,basename[len(music_dir)+1:],filename) music_file_output = music_file_output.replace('/other/%s/other/%s' % (extension,extension),'/other/%s'% (extension,)) utils.copy(other_path,music_file_output) print 'output: %s' % (music_file_output,) corrupt_music_paths = [] missing_tags_paths = [] guessed_missing_tags_paths = [] #music_paths = list(reversed(music_paths)) #music_paths = music_paths[:30] for (i,music_file) in enumerate(music_paths):
print 'moving artists' shutil.move(os.path.join(music_dir, 'artists'), os.path.join(output_music_dir, 'artists')) print 'moving compilations' shutil.move(os.path.join(music_dir, 'compilations'), os.path.join(output_music_dir, 'compilations')) print 'moving corrupt' shutil.move(os.path.join(music_dir, 'corrupt'), os.path.join(output_music_dir, 'corrupt')) print 'moving other' shutil.move(os.path.join(music_dir, 'other'), os.path.join(output_music_dir, 'other')) if copy_others: for other_path in other_paths: extension = utils.extension(other_path) (basename, filename) = os.path.split(other_path) music_file_output = os.path.join(output_music_dir, 'other', extension, basename[len(music_dir) + 1:], filename) music_file_output = music_file_output.replace( '/other/%s/other/%s' % (extension, extension), '/other/%s' % (extension, )) utils.copy(other_path, music_file_output) print 'output: %s' % (music_file_output, ) corrupt_music_paths = [] missing_tags_paths = [] guessed_missing_tags_paths = []
self.table = {} def add(self, dispatch_val, fn=None): def adder(fn): self.table[dispatch_val] = fn if fn: adder(fn) else: return adder @property def default(self): def adder(fn): self.default_fn = fn return adder def __call__(self, *args): val = self.dispatch_fn(*args) if val in self.table: return self.table[val](*args) elif self.default_fn: return self.default_fn(*args) else: raise ValueError("Unknown dispatch value:", val) published_date = multimethod(lambda path: extension(path).lower()) published_date.default(lambda _: None)