Example #1
0
 def test_find_executable_bad_version(self):
     self.assertFalse(find_executable(
         'pipeline runner',
         '123-notrealversion',
         ['./run-pipeline', '../run-pipeline'],
         version_arg='--version')
     )
Example #2
0
 def test_find_executable(self):
     self.assertTrue(find_executable(
         'pipeline runner',
         seesaw.__version__,
         ['./run-pipeline', '../run-pipeline'],
         version_arg='--version')
     )
Example #3
0
    def test_find_executable(self):
        if seesaw.six.PY3:
            exes = ['./run-pipeline3', '../run-pipeline3']
        else:
            exes = ['./run-pipeline', '../run-pipeline']

        self.assertTrue(find_executable(
            'pipeline runner',
            seesaw.__version__,
            exes,
            version_arg='--version')
        )
Example #4
0
    def test_find_executable_bad_version(self):
        if seesaw.six.PY3:
            exes = ['./run-pipeline3', '../run-pipeline3']
        else:
            exes = ['./run-pipeline', '../run-pipeline']

        self.assertFalse(find_executable(
            'pipeline runner',
            '123-notrealversion',
            exes,
            version_arg='--version')
        )
Example #5
0
    def test_find_executable_regex_version(self):
        if seesaw.six.PY3:
            exes = ['./run-pipeline3', '../run-pipeline3']
        else:
            exes = ['./run-pipeline', '../run-pipeline']

        self.assertTrue(find_executable(
            'pipeline runner',
            re.compile(seesaw.__version__.replace('.', '\\.')),
            exes,
            version_arg='--version')
        )
Example #6
0
sys.path.append(os.getcwd())

from archivebot import control
from archivebot import shared_config
from archivebot.seesaw import extensions
from archivebot.seesaw import monitoring
from archivebot.seesaw.preflight import check_wpull_args
from archivebot.seesaw.wpull import WpullArgs
from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \
    SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \
    RelabelIfAborted, MoveFiles, StopHeartbeat, MarkItemAsDone, CheckIP

VERSION = "20150715.01"
PHANTOMJS_VERSION = '1.9.8'
EXPIRE_TIME = 60 * 60 * 48  # 48 hours between archive requests
WPULL_EXE = find_executable('Wpull', None, ['./wpull'])
PHANTOMJS = find_executable('PhantomJS', PHANTOMJS_VERSION,
        ['phantomjs', './phantomjs', '../phantomjs'], '-v')
YOUTUBE_DL = find_executable('youtube-dl', None, ['./youtube-dl'], '--version')

version_integer = (sys.version_info.major * 10) + sys.version_info.minor

assert version_integer >= 33, \
        "This pipeline requires Python >= 3.3.  You are running %s." % \
        sys.version

if not os.environ.get('NO_SEGFAULT_340'):
    assert sys.version_info[:3] != (3, 4, 0), \
        "Python 3.4.0 should not be used. It may segfault. " \
        "Set NO_SEGFAULT_340=1 if your Python is patched. " \
        "See https://bugs.python.org/issue21435"
Example #7
0
    raise Exception("This pipeline needs seesaw version 0.1.5 or higher.")


###########################################################################
# Find a useful Wget+Lua executable.
#
# WGET_LUA will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
WGET_LUA = find_executable(
    "Wget+Lua",
    ["GNU Wget 1.14.lua.20130523-9a5c"],
    [
        "./wget-lua",
        "./wget-lua-warrior",
        "./wget-lua-local",
        "../wget-lua",
        "../../wget-lua",
        "/home/warrior/wget-lua",
        "/usr/bin/wget-lua"
    ]
)

if not WGET_LUA:
    raise Exception("No usable Wget+Lua found.")

###########################################################################
# Determine if FFMPEG is available
# Should probably utilize an ffmpeg build (or source) distributed from the
# repo to avoid nasty API incompatibilities between FFMPEG versions.
# However, if the options used are relatively simple, using distro-provided
Example #8
0
    raise Exception('This pipeline needs seesaw version 0.8.5 or higher.')


###########################################################################
# Find a useful Wget+Lua executable.
#
# WGET_LUA will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
WGET_LUA = find_executable(
    'Wget+Lua',
    ['GNU Wget 1.14.lua.20130523-9a5c', 'GNU Wget 1.14.lua.20160530-955376b'],
    [
        './wget-lua',
        './wget-lua-warrior',
        './wget-lua-local',
        '../wget-lua',
        '../../wget-lua',
        '/home/warrior/wget-lua',
        '/usr/bin/wget-lua'
    ]
)

if not WGET_LUA:
    raise Exception('No usable Wget+Lua found.')


###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
Example #9
0
import json

from os import environ as env
from urlparse import urlparse
from seesaw.project import *
from seesaw.item import *
from seesaw.task import *
from seesaw.pipeline import *
from seesaw.externalprocess import *

from seesaw.util import find_executable

VERSION = "20140119.01"
USER_AGENT = "ArchiveTeam ArchiveBot/%s" % VERSION
EXPIRE_TIME = 60 * 60 * 48  # 48 hours between archive requests
WGET_LUA = find_executable('Wget+Lua', "GNU Wget 1.14.0-archivebot1",
        [ './wget-lua' ])

if not WGET_LUA:
    raise Exception("No usable Wget+Lua found.")

if 'RSYNC_URL' not in env:
    raise Exception('RSYNC_URL not set.')

if 'REDIS_URL' not in env:
    raise Exception('REDIS_URL not set.')

if 'LOG_CHANNEL' not in env:
    raise Exception('LOG_CHANNEL not set.')

RSYNC_URL = env['RSYNC_URL']
REDIS_URL = env['REDIS_URL']
Example #10
0
      return path
  return None
#---------------------------------------

###########################################################################
# Find a useful Wget+Lua executable.
#
# WGET_LUA will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
WGET_LUA = find_executable("Wget+Lua",
    [ "GNU Wget 1.14.lua.20130120-8476",
      "GNU Wget 1.14.lua.20130407-1f1d",
      "GNU Wget 1.14.lua.20130427-92d2",
      "GNU Wget 1.14.lua.20130523-9a5c" ],
    [ "./wget-lua",
      "./wget-lua-warrior",
      "./wget-lua-local",
      "../wget-lua",
      "../../wget-lua",
      "/home/warrior/wget-lua",
      "/usr/bin/wget-lua" ])

if not WGET_LUA:
  raise Exception("No usable Wget+Lua found.")

###########################################################################
# The user agent for external requests.
#
# Use this constant in the Wget command line.
USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.28"
Example #11
0
from seesaw.config import *
from seesaw.item import *
from seesaw.task import *
from seesaw.pipeline import *
from seesaw.externalprocess import *
from seesaw.tracker import *
from seesaw.util import find_executable


WGET_LUA = find_executable(
    "Wget+Lua",
    "GNU Wget 1.14.lua.20130120-8476",
    [
        "./wget-lua",
        "./wget-lua-warrior",
        "./wget-lua-local",
        "../wget-lua",
        "../../wget-lua",
        "/home/warrior/wget-lua",
        "/usr/bin/wget-lua",
    ],
)

if not WGET_LUA:
    raise Exception("No usable Wget+Lua found.")


USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27"
VERSION = "20130129.01"

# check the seesaw version
if StrictVersion(seesaw.__version__) < StrictVersion("0.1.5"):
	raise Exception("This pipeline needs seesaw version 0.1.5 or higher.")


###########################################################################
# Find a useful rsync_size_tester executable.
#
RSYNC_TEST = find_executable(
	"rsync_size_tester",
	["1"],
	[
		"./rsync_size_tester.py",
		"../rsync_size_tester.py",
		"../../rsync_size_tester.py",
		"/home/warrior/rsync_size_tester.py",
		"/usr/bin/rsync_size_tester.py"
	]
)

#Yes this is hackish but run-pipeline won't let you add more command line args
#If the file "LARGE-RSYNC" is in the directory, allow larger rsync's
#Using Gigabytes not Gibibytes to be safe
if os.path.isfile("LARGE-RSYNC"):
	MAX_RSYNC = "150000000000"
else:
	MAX_RSYNC = "25000000000"

Example #13
0
import shutil
import json

from tornado.httpclient import HTTPClient, HTTPRequest

from seesaw.project import *
from seesaw.item import *
from seesaw.config import *
from seesaw.task import *
from seesaw.pipeline import *
from seesaw.externalprocess import *
from seesaw.tracker import *

from seesaw.util import find_executable

WGET_LUA = find_executable('wget-lua', '1.14.lua.20130523-9a5c',
                           ['./wget-lua', 'wget-lua'])

CURL = find_executable('curl', '7.2', ['curl'])

if not WGET_LUA:
    raise Exception("wget-lua cannot be found")

if not CURL:
    raise Exception("curl cannot be found")

# ----

DATA_DIR = "data"
USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0"
VERSION = "20130910.01"
TRACKER = "http://quilt.at.ninjawedding.org/patchy"
from seesaw.externalprocess import WgetDownload, ExternalProcess
from seesaw.pipeline import Pipeline
from seesaw.project import Project
from seesaw.util import find_executable


# check the seesaw version
if StrictVersion(seesaw.__version__) < StrictVersion("0.8.5"):
    raise Exception("This pipeline needs seesaw version 0.8.5 or higher.")


###########################################################################
# Find a useful rsync executable
RSYNC = find_executable(
    "rsync",["2.6.9"],
    [
        "/usr/bin/rsync"
    ]
)

#if not RSYNC:
#    raise Exception("No usable rsync found.")


###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = "20150614.01"
USER_AGENT = 'ArchiveTeam'
TRACKER_ID = 'sourceforge-rsync'
sys.path.append(os.getcwd())
from config import *
from depcheck import *

checkDeps()

###########################################################################
# Find a useful grabProject executable.
#
GRAB_TEST = find_executable(
	"grabProject",
	["1"],
	[
		"./grabProject.py",
		"../grabProject.py",
		"../../grabProject.py",
		"/home/warrior/grabProject.py",
		"/usr/bin/grabProject.py"
	]
)


###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = "20151123.05"
USER_AGENT = 'ArchiveTeam'
TRACKER_ID = 'googlecodersync'
Example #16
0
if StrictVersion(seesaw.__version__) < StrictVersion("0.8.3"):
    raise Exception("This pipeline needs seesaw version 0.8.3 or higher.")


###########################################################################
# Find a useful Wpull executable.
#
# WPULL_EXE will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
WPULL_EXE = find_executable(
    "Wpull",
    re.compile(r"\b1\.2\b"),
    [
        "./wpull",
        os.path.expanduser("~/.local/share/wpull-1.2/wpull"),
        os.path.expanduser("~/.local/bin/wpull"),
        "./wpull_bootstrap",
        "wpull",
    ],
)

if not WPULL_EXE:
    raise Exception("No usable Wpull found.")


###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
Example #17
0
    raise Exception("This pipeline needs seesaw version 0.8.5 or higher.")


###########################################################################
# Find a useful Wget+Lua executable.
#
# WGET_LUA will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
WGET_LUA = find_executable(
    "Wget+Lua",
    ["GNU Wget 1.14.lua.20130523-9a5c"],
    [
        "./wget-lua",
        "./wget-lua-warrior",
        "./wget-lua-local",
        "../wget-lua",
        "../../wget-lua",
        "/home/warrior/wget-lua",
        "/usr/bin/wget-lua"
    ]
)

if not WGET_LUA:
    raise Exception("No usable Wget+Lua found.")


###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
Example #18
0
sys.path.append(os.getcwd())

from archivebot import control
from archivebot import shared_config
from archivebot.seesaw import extensions
from archivebot.seesaw import monitoring
from archivebot.seesaw.preflight import check_wpull_args
from archivebot.seesaw.wpull import WpullArgs
from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \
    SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \
    RelabelIfAborted, MoveFiles, StopHeartbeat, MarkItemAsDone, CheckIP

VERSION = "20150424.01"
PHANTOMJS_VERSION = '1.9.8'
EXPIRE_TIME = 60 * 60 * 48  # 48 hours between archive requests
WPULL_EXE = find_executable('Wpull', None, [ './wpull' ])
PHANTOMJS = find_executable('PhantomJS', PHANTOMJS_VERSION,
        ['phantomjs', './phantomjs', '../phantomjs'], '-v')

version_integer = (sys.version_info.major * 10) + sys.version_info.minor

assert version_integer >= 33, \
        "This pipeline requires Python >= 3.3.  You are running %s." % \
        sys.version

if not os.environ.get('NO_SEGFAULT_340'):
    assert sys.version_info[:3] != (3, 4, 0), \
        "Python 3.4.0 should not be used. It may segfault. " \
        "Set NO_SEGFAULT_340=1 if your Python is patched. " \
        "See https://bugs.python.org/issue21435"
Example #19
0
# nice, though.
sys.path.append(os.getcwd())

from archivebot import control
from archivebot import shared_config
from archivebot.seesaw import extensions
from archivebot.seesaw import monitoring
from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \
    SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \
    RelabelIfAborted, MoveFiles, SetWarcFileSizeInRedis, StopHeartbeat, \
    MarkItemAsDone


VERSION = "20140819.03"
EXPIRE_TIME = 60 * 60 * 48  # 48 hours between archive requests
WPULL_EXE = find_executable('Wpull', None, [ './wpull' ])
PHANTOMJS = find_executable('PhantomJS', '1.9.7',
        ['phantomjs', './phantomjs'], '-v')

version_integer = (sys.version_info.major * 10) + sys.version_info.minor

assert version_integer >= 33, \
        "This pipeline requires Python >= 3.3.  You are running %s." % \
        sys.version

assert WPULL_EXE, 'No usable Wpull found.'
assert PHANTOMJS, 'PhantomJS 1.9.0 was not found.'
assert 'RSYNC_URL' in env, 'RSYNC_URL not set.'
assert 'REDIS_URL' in env, 'REDIS_URL not set.'

if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"):
Example #20
0
import json

from os import environ as env
from urlparse import urlparse
from seesaw.project import *
from seesaw.item import *
from seesaw.task import *
from seesaw.pipeline import *
from seesaw.externalprocess import *

from seesaw.util import find_executable

VERSION = "20131101.01"
USER_AGENT = "ArchiveTeam ArchiveBot/%s" % VERSION
EXPIRE_TIME = 60 * 60 * 48  # 48 hours between archive requests
WGET_LUA = find_executable("Wget+Lua", "GNU Wget 1.14.0-archivebot1", ["./wget-lua"])

if not WGET_LUA:
    raise Exception("No usable Wget+Lua found.")

if "RSYNC_URL" not in env:
    raise Exception("RSYNC_URL not set.")

if "REDIS_URL" not in env:
    raise Exception("REDIS_URL not set.")

if "LOG_CHANNEL" not in env:
    raise Exception("LOG_CHANNEL not set.")

RSYNC_URL = env["RSYNC_URL"]
REDIS_URL = env["REDIS_URL"]