def work(): ''' Fetch and process a job by running the McXtrace simulation ''' # fetch job run = one_or_none(fetch(SimRun, status="waiting").order_by('created')) if run is None: return run.status = "running" run.save() print "Running job: ", run.ref # create output folder; works as a lock workdir = "%s/%%s" % (WORK_PATH % run.ref) try: os.mkdir(workdir % "") except OSError: # Someone else beat us to it, bail out print "Skipping: already running." return # try to process job try: processJob(run, workdir) except: exc = traceback.format_exc() file(workdir % 'err.txt', 'a').write('\n' + STACKTRACE % exc) print exc # mark job as completed run.status = "done" run.save() print "Done."
def work(): """ Fetch and process a job by running the McStas simulation """ # fetch job run = one_or_none(fetch(SimRun, status="waiting").order_by("created")) if run is None: return run.status = "running" run.save() print "Running job: ", run.ref # create output folder; works as a lock workdir = "%s/%%s" % (WORK_PATH % run.ref) try: os.mkdir(workdir % "") except OSError: # Someone else beat us to it, bail out print "Skipping: already running." return # try to process job try: processJob(run, workdir) except: exc = traceback.format_exc() file(workdir % "err.txt", "a").write("\n" + STACKTRACE % exc) print exc # mark job as completed run.status = "done" with transaction.commit_on_success(): run.save() print "Done."
def lambda_handler(event, context=None): platforms = fetch('CURRENT_TARGETS', event['version']).strip().split('\n') if event['buildInput']['platforms']: platforms = [ p for p in platforms if p in event['buildInput']['platforms'] ] return platforms
def download_file(reqs_ses, url, filepath): res = common.fetch( requests_session=reqs_ses, url=url, method='get', expect_status=200, ) common.write_file( # Store page to disk file_path=filepath, data=res.content) logging.debug('Saved {0} to {1}'.format(url, filepath)) return
def load_archive(req_ses, blog_name): base_url = 'http://{0}.tumblr.com/archive'.format(blog_name) all_history = '' # Load first block first_response = common.fetch( requests_session=req_ses, url=base_url, method='get', ) common.write_file(file_path=os.path.join('debug', 'first_response.html'), data=first_response.content) all_history += first_response.content # Find last post date last_post_date = find_last_post_date(html=first_response.content) # Load subsequent history while True: # Load next block scroll_url = 'http://{0}.tumblr.com/archive?before_time={1}'.format( blog_name, last_post_date) scroll_response = common.fetch( requests_session=req_ses, url=base_url, method='get', ) common.write_file(file_path=os.path.join('debug', 'scroll_response.html'), data=scroll_response.content) all_history += scroll_response.content # Find last post date last_post_date = find_last_post_date(html=scroll_response.content) # Stop if no more posts if not last_post_date: break # Store combined page common.write_file(file_path=os.path.join('debug', 'all_history.html'), data=all_history) return all_history
def save_followed_file(req_ses, followed_list_path): logging.info('Saving list of followed blogs') followed_list_path = os.path.join('dl', 'followed.opml') # Get followed list file # www.tumblr.com/following.opml logging.debug('Loading followed list') followed_res = common.fetch( requests_session=req_ses, url='https://www.tumblr.com/following.opml', method='get', ) common.write_file( # Save to file file_path=followed_list_path, data=followed_res.content) logging.info('Saved list of followed blogs') return
def find_blog_name_thorough(req_ses, blog_url): # TODO logging.debug( 'Using slower, more thorough name-finding on {0!r}'.format(blog_url)) # Extract domain # 'http://nsfw.kevinsano.com' # 'nsfw.kevinsano.com' domain_search = re.search(r'(?:https?://)?([^\\/]+\.\w+)/?', blog_url) if domain_search: domain = domain_search.group(1) logging.debug('domain={0!r}'.format(domain)) else: logging.error('Could not identify domain! Failing.') return None # Genreate archive page URL blog_rss_url = 'http://{0}/rss'.format(domain) logging.debug('blog_rss_url={0!r}'.format(blog_rss_url)) rss_path = os.path.join('debug', 'run_grab_site.find_blog_name_thorough.rss.rss') # Load archive page rss_res = common.fetch( requests_session=req_ses, url=blog_rss_url, method='get', ) common.write_file( # Save to file for debugging file_path=rss_path, data=rss_res.content) # Extract blog name from page # '<generator>Tumblr (3.0; @nsfwkevinsano)</generator>' # 'nsfwkevinsano' name_search = re.search('<generator>[^<]{0,25}@([^)<]+)\)</generator>', rss_res.content) if name_search: blog_name = name_search.group(1) logging.debug('blog_name={0!r}'.format(blog_name)) return blog_name
def main(): parser = argparse.ArgumentParser() parser.add_argument('--debug', action='store_true') parser.add_argument('--arch', default=common.arch) parser.add_argument('--skia-dir') parser.add_argument('--skia-release', default='m93-87e8842e8c') parser.add_argument('--skija-version') (args, _) = parser.parse_known_args() # Fetch Skia build_type = 'Debug' if args.debug else 'Release' if args.skia_dir: skia_dir = os.path.abspath(args.skia_dir) os.chdir(common.root + '/platform') else: os.chdir(common.root + '/platform') skia_dir = "Skia-" + args.skia_release + "-" + common.system + "-" + build_type + '-' + common.arch if not os.path.exists(skia_dir): zip = skia_dir + '.zip' common.fetch('https://github.com/JetBrains/skia-build/releases/download/' + args.skia_release + '/' + zip, zip) with zipfile.ZipFile(zip, 'r') as f: print("Extracting", zip) f.extractall(skia_dir) os.remove(zip) skia_dir = os.path.abspath(skia_dir) print("Using Skia from", skia_dir) # CMake os.makedirs("build", exist_ok = True) common.check_call([ "cmake", "-G", "Ninja", "-DCMAKE_BUILD_TYPE=" + build_type, "-DSKIA_DIR=" + skia_dir, "-DSKIA_ARCH=" + common.arch] + (["-DCMAKE_OSX_ARCHITECTURES=" + {"x64": "x86_64", "arm64": "arm64"}[common.arch]] if common.system == "macos" else []) + [".."], cwd=os.path.abspath('build')) # Ninja common.check_call(["ninja"], cwd=os.path.abspath('build')) # Codesign if common.system == "macos" and os.getenv("APPLE_CODESIGN_IDENTITY"): subprocess.call(["codesign", # "--force", # "-vvvvvv", "--deep", "--sign", os.getenv("APPLE_CODESIGN_IDENTITY"), "build/libskija_" + common.arch + ".dylib"]) # javac modulepath = [] if args.skija_version: modulepath += [ common.fetch_maven('org.jetbrains.skija', 'skija-shared', args.skija_version, repo='https://packages.jetbrains.team/maven/p/skija/maven') ] else: build_shared.main() modulepath += ['../shared/target/classes'] os.chdir(common.root + '/platform') sources = common.glob('java-' + common.classifier, '*.java') common.javac(sources, 'target/classes', modulepath = modulepath, release = '9') # Copy files target = 'target/classes/org/jetbrains/skija' if common.classifier == 'macos-x64': common.copy_newer('build/libskija_x64.dylib', target + '/macos/x64/libskija_x64.dylib') elif common.classifier == 'macos-arm64': common.copy_newer('build/libskija_arm64.dylib', target + '/macos/arm64/libskija_arm64.dylib') elif common.classifier == 'linux': common.copy_newer('build/libskija.so', target + '/linux/libskija.so') elif common.classifier == 'windows': common.copy_newer('build/skija.dll', target + '/windows/skija.dll') common.copy_newer(skia_dir + '/out/' + build_type + '-' + common.arch + '/icudtl.dat', target + '/windows/icudtl.dat') return 0
#!/usr/bin/env python import time from common import fetch, pluralize, logger, db period_sleep = 1 # polling loop while (True): time.sleep(period_sleep) # fetch last known ledger index last_seq = int(db.ledgers.find_one(sort=[('seq', -1)])['seq']) # get latest closed ledger response = fetch({"method": "ledger_closed", "params": [{}]}) seq = response['ledger_index'] logger.info('FETCHED ledger {}'.format(seq)) # how many did we miss? delta = seq - last_seq if (delta == 0): logger.info('local database is up to date') continue else: logger.info('LAGGING by {} ledger{}'.format(delta, pluralize(delta))) # parse missing ledgers for idx in range(last_seq + 1, seq + 1): # ledgerEntryType
def tumblr_login(req_ses, email, username, password): logging.info('Logging in as {0!r}'.format(username)) # Load front page to look normal logging.debug('Loading front page to prepare login attempt') response_1 = common.fetch( requests_session=req_ses, url='https://www.tumblr.com/login', method='get', ) common.write_file( file_path=os.path.join('debug', 'login.response_1.html'), data=response_1.content ) # Load login page logging.debug('Loading login page') response_2 = common.fetch( requests_session=req_ses, url='https://www.tumblr.com/login', method='get', ) common.write_file( file_path=os.path.join('debug', 'login.response_2.html'), data=response_2.content ) # Get key from login page #'<meta name="tumblr-form-key" id="tumblr_form_key" content="!1231544361914|zzONO1XougbCpvRupb561N630">' token_search = re.search('<meta name="tumblr-form-key" id="tumblr_form_key" content="([a-zA-Z0-9!|]+)">', response_2.content, re.IGNORECASE) token = token_search.group(1) # Perform login logging.debug('Sending login request') response_3 = common.fetch( requests_session=req_ses, url='https://www.tumblr.com/login', method='post', data={ 'determine_email': email, 'user[email]': email, 'user[password]': password, 'form_key': token, }, expect_status=200, ) common.write_file( file_path=os.path.join('debug', 'login.response_3.html'), data=response_3.content ) # Validate login worked logging.debug('Checking if login worked') response_4 = common.fetch( requests_session=req_ses, url='https://www.tumblr.com/dashboard', method='get', ) common.write_file( file_path=os.path.join('debug', 'login.response_4.html'), data=response_4.content ) # TODO logging.warning('Login validation still TODO') logging.info('Logged in as {0!r}'.format(username)) return
def fetch_test_data(name): common.fetch(URL_PREFIX + name, os.path.join(IDNA_TEST_DATA_DIR, os.path.basename(name)))
def save_post_media(Thread, Image, db_ses, req_ses, media_base_path, board_name, post): """Save media from a post, adding it to the DB. Expects py8chan type Post object. Commits changes.""" if (not config.media_enable_saving): # If media saving disabled completely return # Save any new media (from new posts). for image in post.all_files():# For each image, if any: # Lookup image hash in DB. image_find_query = db_ses.query(Image)\ .filter(Image.hash_md5 == image.file_md5_hex) existing_image_row = image_find_query.first() if existing_image_row: # Check if image needs (re)downloading do_redownload = decide_if_media_redownload( media_base_path=media_base_path, board_name=board_name, image_row=existing_image_row ) if (not do_redownload): return None# Be f*****g explicit about what we're doing. # If image not in DB, download image then add it to DB. if (config.media_download_enable_full):# If disabled fullsized media files will not be downloaded. # Download image: # Generate path to save image to image_filename = image.filename if (image_filename == 'deleted'): logging.info('File was deleted and cannot be downloaded: {0!r}') assert2( (type(image_filename) is str), value=image_filename)# Should be text. (Sanity check remote value) assert2( (8 <= len(image_filename) <= 128), value=image_filename)# is image hash so about 64 chars. (Sanity check remote value) board_path = os.path.join(media_base_path, board_name) image_filepath = generate_media_filepath( media_base_path=media_base_path, media_type='image',# TODO: Validate that 'image' is what we're using for this value filename=image.filename ) file_extension = image.file_extension assert2( (type(file_extension) is str), value=file_extension)# Should be text. (Sanity check remote value) assert2( (0 <= len(file_extension) <= 16), value=file_extension)# Short text. (Sanity check remote value) image_url = image.file_url assert2( (type(image_url) is str), image_url)# Should be text. (Sanity check remote value) assert2( (16 <= len(image_url) <= 256), image_url)# Short text. (Sanity check remote value) # Load image from server try: logging.debug('save_post_media() image_url={0!r}; image_filepath={1!r};'.format(image_url, image_filepath)) image_resp = common.fetch( requests_session=req_ses, url = image_url, ) except common.FetchGot404 as err: ## logging.exception(err) logging.warning('Could not fetch primary remote file, skipping this image.') continue# Skip handling of this image download. # Save image file common.write_file( file_path=image_filepath, data=image_resp.content ) # Calculate hashes. (We use more than one because hash collisions are a thing.) with open(image_filepath, 'rb') as image_f: # https://www.pythoncentral.io/hashing-files-with-python/ # Filesize in bytes size_bytes = os.path.getsize(image_filepath)# https://stackoverflow.com/questions/2104080/how-to-check-file-size-in-python hash_md5 = common.hash_file_md5(filepath=image_filepath) hash_sha1 = common.hash_file_sha1(filepath=image_filepath) hash_sha256 = common.hash_file_sha256(filepath=image_filepath) hash_sha512 = common.hash_file_sha512(filepath=image_filepath) print('BREAKPOINT before md5 check') # Sanitycheck recieved file's MD5 against what the server told us. (Do we really need to do this? Does having this check make us less reliable?) md5_algorithm_works = (hash_md5 == image.file_md5) if (not md5_algorithm_works): logging.warning('MD5 IMPLIMENTATION IN USE IS NOT CONSISTENT WITH EXPECTED DATA! DO NOT USE IN PRODUCTION!')# TODO: Crash on this happening once everything else is working. logging.debug('hash_md5={0!r}'.format(hash_md5)) logging.debug('image.file_md5={0!r}'.format(image.file_md5)) logging.debug('image.file_md5_hex={0!r}'.format(image.file_md5_hex)) print('BREAKPOINT in md5 check failed')# F**K WHY IS ENCODING SUCH A PAIN IN THE ASS?! else: logging.warning('MD5 hashes matched this time') print('BREAKPOINT md5 check passed') print('BREAKPOINT after md5 check') else: # If image download disabled: # NULL values for unfetched data filename_full = None size_bytes = None hash_md5 = None hash_sha1 = None hash_sha256 = None hash_sha512 = None if (config.media_download_enable_thumb):# If disabled thumbnails will not be downloaded. # Download thumbnail: thumbnail_url = image.thumbnail_url # Genreate thumbnail path filename_thumbnail = os.path.basename(thumbnail_url)# TODO: Less shitty handling of this. (Feels wrong to use filsystem code for a URL) thumbnail_filepath = generate_media_filepath( media_base_path=media_base_path, media_type='thumb', filename=filename_thumbnail ) try: # Load thumbnail from server thumbnail_resp = common.fetch( requests_session=req_ses, url = thumbnail_url, ) except common.FetchGot404 as err: ## logging.exception(err) logging.warning('Could not fetch thumbnail remote file, skipping this image.')# TODO: Decide if missing thumbnail should or should not be enough to skip image add. continue# Skip handling of this image download. # Save thumbnail file common.write_file( file_path=thumbnail_filepath, data=image_resp.content ) else: # If no thumbnail download: # NULL values for unfetched data filename_thumbnail = None # Create DB record for image new_image_row = Image( # Identification of image characteristics size_bytes = size_bytes, # Size of the fullsized image in bytes. hash_md5 = hash_md5, # MD5 hash of full file. hash_sha1 = hash_sha1, # SHA1 hash of full file. hash_sha256 = hash_sha256, # SHA256 hash of full file. hash_sha512 = hash_sha512, # SHA512 hash of full file. # Files on disk file_extension = file_extension, # File extention of the fullview file. filename_full = image_filename, # Fullsized media file's filename. filename_thumbnail = filename_thumbnail, # Thumbnail's filename. Does not care if OP or reply. ) # Stage new image entry to DB. db_ses.add(new_image_row) # Commit new image entry. db_ses.commit() logging.info('Added image to DB: {0!r}'.format(image_filepath)) time.sleep(config.media_download_delay)# Ratelimiting continue# Done saving this image. return None# Can we return a list of DB IDs for the media?