Beispiel #1
0
def work():
    ''' Fetch and process a job by running the McXtrace simulation '''
    # fetch job
    run = one_or_none(fetch(SimRun, status="waiting").order_by('created'))
    if run is None:
        return

    run.status = "running"
    run.save()

    print "Running job: ", run.ref

    # create output folder; works as a lock
    workdir = "%s/%%s" % (WORK_PATH % run.ref)
    try:
        os.mkdir(workdir % "")
    except OSError:
        # Someone else beat us to it, bail out
        print "Skipping: already running."
        return

    # try to process job
    try:
        processJob(run, workdir)
    except:
        exc = traceback.format_exc()
        file(workdir % 'err.txt', 'a').write('\n' + STACKTRACE % exc)
        print exc

    # mark job as completed
    run.status = "done"
    run.save()
    print "Done."
Beispiel #2
0
def work():
    ''' Fetch and process a job by running the McXtrace simulation '''
    # fetch job
    run = one_or_none(fetch(SimRun, status="waiting").order_by('created'))
    if run is None:
        return

    run.status = "running"
    run.save()

    print "Running job: ", run.ref

    # create output folder; works as a lock
    workdir = "%s/%%s" % (WORK_PATH % run.ref)
    try:
        os.mkdir(workdir % "")
    except OSError:
        # Someone else beat us to it, bail out
        print "Skipping: already running."
        return

    # try to process job
    try:
        processJob(run, workdir)
    except:
        exc = traceback.format_exc()
        file(workdir % 'err.txt', 'a').write('\n' + STACKTRACE % exc)
        print exc

    # mark job as completed
    run.status = "done"
    run.save()
    print "Done."
Beispiel #3
0
def work():
    """ Fetch and process a job by running the McStas simulation """
    # fetch job
    run = one_or_none(fetch(SimRun, status="waiting").order_by("created"))
    if run is None:
        return

    run.status = "running"
    run.save()

    print "Running job: ", run.ref

    # create output folder; works as a lock
    workdir = "%s/%%s" % (WORK_PATH % run.ref)
    try:
        os.mkdir(workdir % "")
    except OSError:
        # Someone else beat us to it, bail out
        print "Skipping: already running."
        return

    # try to process job
    try:
        processJob(run, workdir)
    except:
        exc = traceback.format_exc()
        file(workdir % "err.txt", "a").write("\n" + STACKTRACE % exc)
        print exc

    # mark job as completed
    run.status = "done"
    with transaction.commit_on_success():
        run.save()
    print "Done."
def lambda_handler(event, context=None):
    platforms = fetch('CURRENT_TARGETS', event['version']).strip().split('\n')

    if event['buildInput']['platforms']:
        platforms = [
            p for p in platforms if p in event['buildInput']['platforms']
        ]

    return platforms
def download_file(reqs_ses, url, filepath):
    res = common.fetch(
        requests_session=reqs_ses,
        url=url,
        method='get',
        expect_status=200,
    )
    common.write_file(  # Store page to disk
        file_path=filepath, data=res.content)
    logging.debug('Saved {0} to {1}'.format(url, filepath))
    return
def load_archive(req_ses, blog_name):
    base_url = 'http://{0}.tumblr.com/archive'.format(blog_name)
    all_history = ''
    # Load first block
    first_response = common.fetch(
        requests_session=req_ses,
        url=base_url,
        method='get',
    )
    common.write_file(file_path=os.path.join('debug', 'first_response.html'),
                      data=first_response.content)
    all_history += first_response.content
    # Find last post date
    last_post_date = find_last_post_date(html=first_response.content)
    # Load subsequent history
    while True:
        # Load next block
        scroll_url = 'http://{0}.tumblr.com/archive?before_time={1}'.format(
            blog_name, last_post_date)
        scroll_response = common.fetch(
            requests_session=req_ses,
            url=base_url,
            method='get',
        )
        common.write_file(file_path=os.path.join('debug',
                                                 'scroll_response.html'),
                          data=scroll_response.content)
        all_history += scroll_response.content
        # Find last post date
        last_post_date = find_last_post_date(html=scroll_response.content)
        # Stop if no more posts
        if not last_post_date:
            break

    # Store combined page
    common.write_file(file_path=os.path.join('debug', 'all_history.html'),
                      data=all_history)
    return all_history
Beispiel #7
0
def save_followed_file(req_ses, followed_list_path):
    logging.info('Saving list of followed blogs')
    followed_list_path = os.path.join('dl', 'followed.opml')
    # Get followed list file
    # www.tumblr.com/following.opml
    logging.debug('Loading followed list')
    followed_res = common.fetch(
        requests_session=req_ses,
        url='https://www.tumblr.com/following.opml',
        method='get',
    )
    common.write_file(  # Save to file
        file_path=followed_list_path,
        data=followed_res.content)
    logging.info('Saved list of followed blogs')
    return
Beispiel #8
0
def find_blog_name_thorough(req_ses, blog_url):  # TODO
    logging.debug(
        'Using slower, more thorough name-finding on {0!r}'.format(blog_url))
    # Extract domain
    # 'http://nsfw.kevinsano.com'
    # 'nsfw.kevinsano.com'
    domain_search = re.search(r'(?:https?://)?([^\\/]+\.\w+)/?', blog_url)
    if domain_search:
        domain = domain_search.group(1)
        logging.debug('domain={0!r}'.format(domain))
    else:
        logging.error('Could not identify domain! Failing.')
        return None
    # Genreate archive page URL
    blog_rss_url = 'http://{0}/rss'.format(domain)
    logging.debug('blog_rss_url={0!r}'.format(blog_rss_url))
    rss_path = os.path.join('debug',
                            'run_grab_site.find_blog_name_thorough.rss.rss')
    # Load archive page
    rss_res = common.fetch(
        requests_session=req_ses,
        url=blog_rss_url,
        method='get',
    )
    common.write_file(  # Save to file for debugging
        file_path=rss_path,
        data=rss_res.content)
    # Extract blog name from page
    # '<generator>Tumblr (3.0; @nsfwkevinsano)</generator>'
    # 'nsfwkevinsano'
    name_search = re.search('<generator>[^<]{0,25}@([^)<]+)\)</generator>',
                            rss_res.content)
    if name_search:
        blog_name = name_search.group(1)
    logging.debug('blog_name={0!r}'.format(blog_name))
    return blog_name
Beispiel #9
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--debug', action='store_true')
  parser.add_argument('--arch', default=common.arch)
  parser.add_argument('--skia-dir')
  parser.add_argument('--skia-release', default='m93-87e8842e8c')
  parser.add_argument('--skija-version')
  (args, _) = parser.parse_known_args()

  # Fetch Skia
  build_type = 'Debug' if args.debug else 'Release'
  if args.skia_dir:
    skia_dir = os.path.abspath(args.skia_dir)
    os.chdir(common.root + '/platform')
  else:
    os.chdir(common.root + '/platform')
    skia_dir = "Skia-" + args.skia_release + "-" + common.system + "-" + build_type + '-' + common.arch
    if not os.path.exists(skia_dir):
      zip = skia_dir + '.zip'
      common.fetch('https://github.com/JetBrains/skia-build/releases/download/' + args.skia_release + '/' + zip, zip)
      with zipfile.ZipFile(zip, 'r') as f:
        print("Extracting", zip)
        f.extractall(skia_dir)
      os.remove(zip)
    skia_dir = os.path.abspath(skia_dir)
  print("Using Skia from", skia_dir)

  # CMake
  os.makedirs("build", exist_ok = True)
  common.check_call([
    "cmake",
    "-G", "Ninja",
    "-DCMAKE_BUILD_TYPE=" + build_type,
    "-DSKIA_DIR=" + skia_dir,
    "-DSKIA_ARCH=" + common.arch]
    + (["-DCMAKE_OSX_ARCHITECTURES=" + {"x64": "x86_64", "arm64": "arm64"}[common.arch]] if common.system == "macos" else [])
    + [".."],
    cwd=os.path.abspath('build'))

  # Ninja
  common.check_call(["ninja"], cwd=os.path.abspath('build'))

  # Codesign
  if common.system == "macos" and os.getenv("APPLE_CODESIGN_IDENTITY"):
    subprocess.call(["codesign",
                     # "--force",
                     # "-vvvvvv",
                     "--deep",
                     "--sign",
                     os.getenv("APPLE_CODESIGN_IDENTITY"),
                     "build/libskija_" + common.arch + ".dylib"])

  # javac
  modulepath = []
  if args.skija_version:
    modulepath += [
      common.fetch_maven('org.jetbrains.skija', 'skija-shared', args.skija_version, repo='https://packages.jetbrains.team/maven/p/skija/maven')
    ]
  else:
    build_shared.main()
    modulepath += ['../shared/target/classes']

  os.chdir(common.root + '/platform')
  sources = common.glob('java-' + common.classifier, '*.java')
  common.javac(sources, 'target/classes', modulepath = modulepath, release = '9')

  # Copy files
  target = 'target/classes/org/jetbrains/skija'
  if common.classifier == 'macos-x64':
    common.copy_newer('build/libskija_x64.dylib', target + '/macos/x64/libskija_x64.dylib')
  elif common.classifier == 'macos-arm64':
    common.copy_newer('build/libskija_arm64.dylib', target + '/macos/arm64/libskija_arm64.dylib')
  elif common.classifier == 'linux':
    common.copy_newer('build/libskija.so', target + '/linux/libskija.so')
  elif common.classifier == 'windows':
    common.copy_newer('build/skija.dll', target + '/windows/skija.dll')
    common.copy_newer(skia_dir + '/out/' + build_type + '-' + common.arch + '/icudtl.dat',
                      target + '/windows/icudtl.dat')

  return 0
Beispiel #10
0
#!/usr/bin/env python

import time
from common import fetch, pluralize, logger, db

period_sleep = 1

# polling loop
while (True):
    time.sleep(period_sleep)

    # fetch last known ledger index
    last_seq = int(db.ledgers.find_one(sort=[('seq', -1)])['seq'])

    # get latest closed ledger
    response = fetch({"method": "ledger_closed", "params": [{}]})
    seq = response['ledger_index']
    logger.info('FETCHED ledger {}'.format(seq))

    # how many did we miss?
    delta = seq - last_seq
    if (delta == 0):
        logger.info('local database is up to date')
        continue
    else:
        logger.info('LAGGING by {} ledger{}'.format(delta, pluralize(delta)))

# parse missing ledgers
    for idx in range(last_seq + 1, seq + 1):

        # ledgerEntryType
def tumblr_login(req_ses, email, username, password):
    logging.info('Logging in as {0!r}'.format(username))

    # Load front page to look normal
    logging.debug('Loading front page to prepare login attempt')
    response_1 = common.fetch(
        requests_session=req_ses,
        url='https://www.tumblr.com/login',
        method='get',
    )
    common.write_file(
        file_path=os.path.join('debug', 'login.response_1.html'),
        data=response_1.content
    )

    # Load login page
    logging.debug('Loading login page')
    response_2 = common.fetch(
        requests_session=req_ses,
        url='https://www.tumblr.com/login',
        method='get',
    )
    common.write_file(
        file_path=os.path.join('debug', 'login.response_2.html'),
        data=response_2.content
    )

    # Get key from login page
    #'<meta name="tumblr-form-key" id="tumblr_form_key" content="!1231544361914|zzONO1XougbCpvRupb561N630">'
    token_search = re.search('<meta name="tumblr-form-key" id="tumblr_form_key" content="([a-zA-Z0-9!|]+)">', response_2.content, re.IGNORECASE)
    token = token_search.group(1)

    # Perform login
    logging.debug('Sending login request')
    response_3 = common.fetch(
        requests_session=req_ses,
        url='https://www.tumblr.com/login',
        method='post',
        data={
            'determine_email': email,
            'user[email]': email,
            'user[password]': password,
            'form_key': token,
        },
        expect_status=200,
    )
    common.write_file(
        file_path=os.path.join('debug', 'login.response_3.html'),
        data=response_3.content
    )

    # Validate login worked
    logging.debug('Checking if login worked')
    response_4 = common.fetch(
        requests_session=req_ses,
        url='https://www.tumblr.com/dashboard',
        method='get',
    )
    common.write_file(
        file_path=os.path.join('debug', 'login.response_4.html'),
        data=response_4.content
    )
    # TODO
    logging.warning('Login validation still TODO')

    logging.info('Logged in as {0!r}'.format(username))
    return
Beispiel #12
0
def fetch_test_data(name):
    common.fetch(URL_PREFIX + name,
                 os.path.join(IDNA_TEST_DATA_DIR, os.path.basename(name)))
Beispiel #13
0
def save_post_media(Thread, Image, db_ses, req_ses, media_base_path, board_name, post):
    """Save media from a post, adding it to the DB.
    Expects py8chan type Post object.
    Commits changes."""
    if (not config.media_enable_saving):
        # If media saving disabled completely
        return
    # Save any new media (from new posts).
    for image in post.all_files():# For each image, if any:
        # Lookup image hash in DB.
        image_find_query = db_ses.query(Image)\
            .filter(Image.hash_md5 == image.file_md5_hex)
        existing_image_row = image_find_query.first()
        if existing_image_row:
            # Check if image needs (re)downloading
            do_redownload = decide_if_media_redownload(
                media_base_path=media_base_path,
                board_name=board_name,
                image_row=existing_image_row
            )
            if (not do_redownload):
                return None# Be f*****g explicit about what we're doing.
        # If image not in DB, download image then add it to DB.
        if (config.media_download_enable_full):# If disabled fullsized media files will not be downloaded.
            # Download image:
            # Generate path to save image to
            image_filename = image.filename
            if (image_filename == 'deleted'):
                logging.info('File was deleted and cannot be downloaded: {0!r}')
            assert2( (type(image_filename) is str), value=image_filename)# Should be text. (Sanity check remote value)
            assert2( (8 <= len(image_filename) <= 128), value=image_filename)# is image hash so about 64 chars. (Sanity check remote value)
            board_path = os.path.join(media_base_path, board_name)
            image_filepath = generate_media_filepath(
                media_base_path=media_base_path,
                media_type='image',# TODO: Validate that 'image' is what we're using for this value
                filename=image.filename
            )
            file_extension = image.file_extension
            assert2( (type(file_extension) is str), value=file_extension)# Should be text. (Sanity check remote value)
            assert2( (0 <= len(file_extension) <= 16), value=file_extension)# Short text. (Sanity check remote value)
            image_url = image.file_url
            assert2( (type(image_url) is str), image_url)# Should be text. (Sanity check remote value)
            assert2( (16 <= len(image_url) <= 256), image_url)# Short text. (Sanity check remote value)
            # Load image from server
            try:
                logging.debug('save_post_media() image_url={0!r}; image_filepath={1!r};'.format(image_url, image_filepath))
                image_resp = common.fetch(
                    requests_session=req_ses,
                    url = image_url,
                )
            except common.FetchGot404 as err:
##                logging.exception(err)
                logging.warning('Could not fetch primary remote file, skipping this image.')
                continue# Skip handling of this image download.
            # Save image file
            common.write_file(
                file_path=image_filepath,
                data=image_resp.content
            )
            # Calculate hashes. (We use more than one because hash collisions are a thing.)
            with open(image_filepath, 'rb') as image_f:
                # https://www.pythoncentral.io/hashing-files-with-python/
                # Filesize in bytes
                size_bytes = os.path.getsize(image_filepath)# https://stackoverflow.com/questions/2104080/how-to-check-file-size-in-python
                hash_md5 = common.hash_file_md5(filepath=image_filepath)
                hash_sha1 = common.hash_file_sha1(filepath=image_filepath)
                hash_sha256 = common.hash_file_sha256(filepath=image_filepath)
                hash_sha512 = common.hash_file_sha512(filepath=image_filepath)

            print('BREAKPOINT before md5 check')
            # Sanitycheck recieved file's MD5 against what the server told us. (Do we really need to do this? Does having this check make us less reliable?)
            md5_algorithm_works = (hash_md5 == image.file_md5)
            if (not md5_algorithm_works):
                logging.warning('MD5 IMPLIMENTATION IN USE IS NOT CONSISTENT WITH EXPECTED DATA! DO NOT USE IN PRODUCTION!')# TODO: Crash on this happening once everything else is working.
                logging.debug('hash_md5={0!r}'.format(hash_md5))
                logging.debug('image.file_md5={0!r}'.format(image.file_md5))
                logging.debug('image.file_md5_hex={0!r}'.format(image.file_md5_hex))
                print('BREAKPOINT in md5 check failed')# F**K WHY IS ENCODING SUCH A PAIN IN THE ASS?!
            else:
                logging.warning('MD5 hashes matched this time')
                print('BREAKPOINT md5 check passed')
            print('BREAKPOINT after md5 check')
        else:
            # If image download disabled:
            # NULL values for unfetched data
            filename_full = None
            size_bytes = None
            hash_md5 = None
            hash_sha1 = None
            hash_sha256 = None
            hash_sha512 = None

        if (config.media_download_enable_thumb):# If disabled thumbnails will not be downloaded.
            # Download thumbnail:
            thumbnail_url = image.thumbnail_url
            # Genreate thumbnail path
            filename_thumbnail = os.path.basename(thumbnail_url)# TODO: Less shitty handling of this. (Feels wrong to use filsystem code for a URL)
            thumbnail_filepath = generate_media_filepath(
                media_base_path=media_base_path,
                media_type='thumb',
                filename=filename_thumbnail
            )
            try:
                # Load thumbnail from server
                thumbnail_resp = common.fetch(
                    requests_session=req_ses,
                    url = thumbnail_url,
                )
            except common.FetchGot404 as err:
##                logging.exception(err)
                logging.warning('Could not fetch thumbnail remote file, skipping this image.')# TODO: Decide if missing thumbnail should or should not be enough to skip image add.
                continue# Skip handling of this image download.
            # Save thumbnail file
            common.write_file(
                file_path=thumbnail_filepath,
                data=image_resp.content
            )
        else:
            # If no thumbnail download:
            # NULL values for unfetched data
            filename_thumbnail = None
        # Create DB record for image
        new_image_row = Image(
            # Identification of image characteristics
            size_bytes = size_bytes, # Size of the fullsized image in bytes.
            hash_md5 = hash_md5, # MD5 hash of full file.
            hash_sha1 = hash_sha1, # SHA1 hash of full file.
            hash_sha256 = hash_sha256, # SHA256 hash of full file.
            hash_sha512 = hash_sha512, # SHA512 hash of full file.
            # Files on disk
            file_extension = file_extension, # File extention of the fullview file.
            filename_full = image_filename, # Fullsized media file's filename.
            filename_thumbnail = filename_thumbnail, # Thumbnail's filename. Does not care if OP or reply.
        )
        # Stage new image entry to DB.
        db_ses.add(new_image_row)
        # Commit new image entry.
        db_ses.commit()
        logging.info('Added image to DB: {0!r}'.format(image_filepath))
        time.sleep(config.media_download_delay)# Ratelimiting
        continue# Done saving this image.
    return None# Can we return a list of DB IDs for the media?