def fetch_peps(): tmp_file = NamedTemporaryFile(delete=False) tmp_dir = mkdtemp() # Get the remote file and save it. I had some issues loading an in memory # zip file for some reason... f = get(zip_url) tmp_file.write(f.content) # Extract the tmp file to a tmp directory. z = ZipFile(tmp_file) # We trust this zip file, otherwise shouldn't use extractall. z.extractall(tmp_dir) results = ((number, ) + pep_file_to_metadata(filename) for number, filename in pep_numbers(tmp_dir)) for number, path, raw, contents, properties in results: pep, created = get_or_create(Pep, commit=False, number=number, defaults={ 'properties': properties, 'filename': path.rsplit("/")[-1], 'content': contents, 'raw_content': raw, }) if not created: pep.content = contents pep.properties = properties db.session.add(pep) db.session.commit()
def fetch_peps(): tmp_file = NamedTemporaryFile(delete=False) tmp_dir = mkdtemp() # Get the remote file and save it. I had some issues loading an in memory # zip file for some reason... try: environ['AWS_ACCESS_KEY_ID'], environ['AWS_SECRET_ACCESS_KEY'] with OldSmallLucidInstance(terminate=True): sudo('apt-get -y -q install mercurial zip') run('hg clone http://hg.python.org/peps ~/peps/') put(join(dirname(abspath(__file__)), 'hg_config'), '~/.hgrc') with cd('~/peps/'): # So, Mercurial is annoying me. Half of the time after doing # a clean checkout its showin that there are file changes. # However, a diff shows nothing - I think its a file # permission thing... but anyway, I don't care what it is - # so doin a commit fixes it. run('hg commit -m "Hackety Hackety Hack!"') run('hg update --clean') run('hg kwexpand') run('zip -q -r ~/peps.zip ./peps/') get('~/peps.zip', tmp_file) pep_base = join(tmp_dir, 'peps') except KeyError: print '*' * 80 print "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environ vars need to be set." print "DEFAULTING TO THE non-mercurial pull method (Revisions and dates will be missing)" print '*' * 80 f = http_get(zip_url) tmp_file.write(f.content) pep_base = join(tmp_dir, 'peps-*') # Extract the tmp file to a tmp directory. z = ZipFile(tmp_file) # We trust this zip file, otherwise shouldn't use extractall. z.extractall(tmp_dir) results = ((number,) + pep_file_to_metadata(filename) for number, filename in pep_numbers(pep_base)) for number, path, raw, contents, properties in results: print number contents = contents.replace("http://www.python.org/dev/peps/pep-", "http://www.peps.io/") title = properties.pop('title') patterns = ["%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S"] if properties.get('last-modified'): for pattern in patterns: try: dt = datetime.strptime(properties.get('last-modified'), pattern) break except ValueError: dt = None filename = path.rsplit("/")[-1] pep, created = get_or_create(Pep, commit=False, number=number, title=title, defaults={ 'properties': properties, 'filename': filename, 'content': contents, 'raw_content': raw, }) if not created: pep.properties = properties pep.filename = filename pep.content = contents pep.raw_content = raw if dt: pep.updated = dt db.session.add(pep) db.session.commit()