def test_find_executable_bad_version(self): self.assertFalse(find_executable( 'pipeline runner', '123-notrealversion', ['./run-pipeline', '../run-pipeline'], version_arg='--version') )
def test_find_executable(self): self.assertTrue(find_executable( 'pipeline runner', seesaw.__version__, ['./run-pipeline', '../run-pipeline'], version_arg='--version') )
def test_find_executable(self): if seesaw.six.PY3: exes = ['./run-pipeline3', '../run-pipeline3'] else: exes = ['./run-pipeline', '../run-pipeline'] self.assertTrue(find_executable( 'pipeline runner', seesaw.__version__, exes, version_arg='--version') )
def test_find_executable_bad_version(self): if seesaw.six.PY3: exes = ['./run-pipeline3', '../run-pipeline3'] else: exes = ['./run-pipeline', '../run-pipeline'] self.assertFalse(find_executable( 'pipeline runner', '123-notrealversion', exes, version_arg='--version') )
def test_find_executable_regex_version(self): if seesaw.six.PY3: exes = ['./run-pipeline3', '../run-pipeline3'] else: exes = ['./run-pipeline', '../run-pipeline'] self.assertTrue(find_executable( 'pipeline runner', re.compile(seesaw.__version__.replace('.', '\\.')), exes, version_arg='--version') )
sys.path.append(os.getcwd()) from archivebot import control from archivebot import shared_config from archivebot.seesaw import extensions from archivebot.seesaw import monitoring from archivebot.seesaw.preflight import check_wpull_args from archivebot.seesaw.wpull import WpullArgs from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \ SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \ RelabelIfAborted, MoveFiles, StopHeartbeat, MarkItemAsDone, CheckIP VERSION = "20150715.01" PHANTOMJS_VERSION = '1.9.8' EXPIRE_TIME = 60 * 60 * 48 # 48 hours between archive requests WPULL_EXE = find_executable('Wpull', None, ['./wpull']) PHANTOMJS = find_executable('PhantomJS', PHANTOMJS_VERSION, ['phantomjs', './phantomjs', '../phantomjs'], '-v') YOUTUBE_DL = find_executable('youtube-dl', None, ['./youtube-dl'], '--version') version_integer = (sys.version_info.major * 10) + sys.version_info.minor assert version_integer >= 33, \ "This pipeline requires Python >= 3.3. You are running %s." % \ sys.version if not os.environ.get('NO_SEGFAULT_340'): assert sys.version_info[:3] != (3, 4, 0), \ "Python 3.4.0 should not be used. It may segfault. " \ "Set NO_SEGFAULT_340=1 if your Python is patched. " \ "See https://bugs.python.org/issue21435"
raise Exception("This pipeline needs seesaw version 0.1.5 or higher.") ########################################################################### # Find a useful Wget+Lua executable. # # WGET_LUA will be set to the first path that # 1. does not crash with --version, and # 2. prints the required version string WGET_LUA = find_executable( "Wget+Lua", ["GNU Wget 1.14.lua.20130523-9a5c"], [ "./wget-lua", "./wget-lua-warrior", "./wget-lua-local", "../wget-lua", "../../wget-lua", "/home/warrior/wget-lua", "/usr/bin/wget-lua" ] ) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") ########################################################################### # Determine if FFMPEG is available # Should probably utilize an ffmpeg build (or source) distributed from the # repo to avoid nasty API incompatibilities between FFMPEG versions. # However, if the options used are relatively simple, using distro-provided
raise Exception('This pipeline needs seesaw version 0.8.5 or higher.') ########################################################################### # Find a useful Wget+Lua executable. # # WGET_LUA will be set to the first path that # 1. does not crash with --version, and # 2. prints the required version string WGET_LUA = find_executable( 'Wget+Lua', ['GNU Wget 1.14.lua.20130523-9a5c', 'GNU Wget 1.14.lua.20160530-955376b'], [ './wget-lua', './wget-lua-warrior', './wget-lua-local', '../wget-lua', '../../wget-lua', '/home/warrior/wget-lua', '/usr/bin/wget-lua' ] ) if not WGET_LUA: raise Exception('No usable Wget+Lua found.') ########################################################################### # The version number of this pipeline definition. # # Update this each time you make a non-cosmetic change.
import json from os import environ as env from urlparse import urlparse from seesaw.project import * from seesaw.item import * from seesaw.task import * from seesaw.pipeline import * from seesaw.externalprocess import * from seesaw.util import find_executable VERSION = "20140119.01" USER_AGENT = "ArchiveTeam ArchiveBot/%s" % VERSION EXPIRE_TIME = 60 * 60 * 48 # 48 hours between archive requests WGET_LUA = find_executable('Wget+Lua', "GNU Wget 1.14.0-archivebot1", [ './wget-lua' ]) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") if 'RSYNC_URL' not in env: raise Exception('RSYNC_URL not set.') if 'REDIS_URL' not in env: raise Exception('REDIS_URL not set.') if 'LOG_CHANNEL' not in env: raise Exception('LOG_CHANNEL not set.') RSYNC_URL = env['RSYNC_URL'] REDIS_URL = env['REDIS_URL']
return path return None #--------------------------------------- ########################################################################### # Find a useful Wget+Lua executable. # # WGET_LUA will be set to the first path that # 1. does not crash with --version, and # 2. prints the required version string WGET_LUA = find_executable("Wget+Lua", [ "GNU Wget 1.14.lua.20130120-8476", "GNU Wget 1.14.lua.20130407-1f1d", "GNU Wget 1.14.lua.20130427-92d2", "GNU Wget 1.14.lua.20130523-9a5c" ], [ "./wget-lua", "./wget-lua-warrior", "./wget-lua-local", "../wget-lua", "../../wget-lua", "/home/warrior/wget-lua", "/usr/bin/wget-lua" ]) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") ########################################################################### # The user agent for external requests. # # Use this constant in the Wget command line. USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.28"
from seesaw.config import * from seesaw.item import * from seesaw.task import * from seesaw.pipeline import * from seesaw.externalprocess import * from seesaw.tracker import * from seesaw.util import find_executable WGET_LUA = find_executable( "Wget+Lua", "GNU Wget 1.14.lua.20130120-8476", [ "./wget-lua", "./wget-lua-warrior", "./wget-lua-local", "../wget-lua", "../../wget-lua", "/home/warrior/wget-lua", "/usr/bin/wget-lua", ], ) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27" VERSION = "20130129.01"
# check the seesaw version if StrictVersion(seesaw.__version__) < StrictVersion("0.1.5"): raise Exception("This pipeline needs seesaw version 0.1.5 or higher.") ########################################################################### # Find a useful rsync_size_tester executable. # RSYNC_TEST = find_executable( "rsync_size_tester", ["1"], [ "./rsync_size_tester.py", "../rsync_size_tester.py", "../../rsync_size_tester.py", "/home/warrior/rsync_size_tester.py", "/usr/bin/rsync_size_tester.py" ] ) #Yes this is hackish but run-pipeline won't let you add more command line args #If the file "LARGE-RSYNC" is in the directory, allow larger rsync's #Using Gigabytes not Gibibytes to be safe if os.path.isfile("LARGE-RSYNC"): MAX_RSYNC = "150000000000" else: MAX_RSYNC = "25000000000"
import shutil import json from tornado.httpclient import HTTPClient, HTTPRequest from seesaw.project import * from seesaw.item import * from seesaw.config import * from seesaw.task import * from seesaw.pipeline import * from seesaw.externalprocess import * from seesaw.tracker import * from seesaw.util import find_executable WGET_LUA = find_executable('wget-lua', '1.14.lua.20130523-9a5c', ['./wget-lua', 'wget-lua']) CURL = find_executable('curl', '7.2', ['curl']) if not WGET_LUA: raise Exception("wget-lua cannot be found") if not CURL: raise Exception("curl cannot be found") # ---- DATA_DIR = "data" USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0" VERSION = "20130910.01" TRACKER = "http://quilt.at.ninjawedding.org/patchy"
from seesaw.externalprocess import WgetDownload, ExternalProcess from seesaw.pipeline import Pipeline from seesaw.project import Project from seesaw.util import find_executable # check the seesaw version if StrictVersion(seesaw.__version__) < StrictVersion("0.8.5"): raise Exception("This pipeline needs seesaw version 0.8.5 or higher.") ########################################################################### # Find a useful rsync executable RSYNC = find_executable( "rsync",["2.6.9"], [ "/usr/bin/rsync" ] ) #if not RSYNC: # raise Exception("No usable rsync found.") ########################################################################### # The version number of this pipeline definition. # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. VERSION = "20150614.01" USER_AGENT = 'ArchiveTeam' TRACKER_ID = 'sourceforge-rsync'
sys.path.append(os.getcwd()) from config import * from depcheck import * checkDeps() ########################################################################### # Find a useful grabProject executable. # GRAB_TEST = find_executable( "grabProject", ["1"], [ "./grabProject.py", "../grabProject.py", "../../grabProject.py", "/home/warrior/grabProject.py", "/usr/bin/grabProject.py" ] ) ########################################################################### # The version number of this pipeline definition. # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker. VERSION = "20151123.05" USER_AGENT = 'ArchiveTeam' TRACKER_ID = 'googlecodersync'
if StrictVersion(seesaw.__version__) < StrictVersion("0.8.3"): raise Exception("This pipeline needs seesaw version 0.8.3 or higher.") ########################################################################### # Find a useful Wpull executable. # # WPULL_EXE will be set to the first path that # 1. does not crash with --version, and # 2. prints the required version string WPULL_EXE = find_executable( "Wpull", re.compile(r"\b1\.2\b"), [ "./wpull", os.path.expanduser("~/.local/share/wpull-1.2/wpull"), os.path.expanduser("~/.local/bin/wpull"), "./wpull_bootstrap", "wpull", ], ) if not WPULL_EXE: raise Exception("No usable Wpull found.") ########################################################################### # The version number of this pipeline definition. # # Update this each time you make a non-cosmetic change. # It will be added to the WARC files and reported to the tracker.
raise Exception("This pipeline needs seesaw version 0.8.5 or higher.") ########################################################################### # Find a useful Wget+Lua executable. # # WGET_LUA will be set to the first path that # 1. does not crash with --version, and # 2. prints the required version string WGET_LUA = find_executable( "Wget+Lua", ["GNU Wget 1.14.lua.20130523-9a5c"], [ "./wget-lua", "./wget-lua-warrior", "./wget-lua-local", "../wget-lua", "../../wget-lua", "/home/warrior/wget-lua", "/usr/bin/wget-lua" ] ) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") ########################################################################### # The version number of this pipeline definition. # # Update this each time you make a non-cosmetic change.
sys.path.append(os.getcwd()) from archivebot import control from archivebot import shared_config from archivebot.seesaw import extensions from archivebot.seesaw import monitoring from archivebot.seesaw.preflight import check_wpull_args from archivebot.seesaw.wpull import WpullArgs from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \ SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \ RelabelIfAborted, MoveFiles, StopHeartbeat, MarkItemAsDone, CheckIP VERSION = "20150424.01" PHANTOMJS_VERSION = '1.9.8' EXPIRE_TIME = 60 * 60 * 48 # 48 hours between archive requests WPULL_EXE = find_executable('Wpull', None, [ './wpull' ]) PHANTOMJS = find_executable('PhantomJS', PHANTOMJS_VERSION, ['phantomjs', './phantomjs', '../phantomjs'], '-v') version_integer = (sys.version_info.major * 10) + sys.version_info.minor assert version_integer >= 33, \ "This pipeline requires Python >= 3.3. You are running %s." % \ sys.version if not os.environ.get('NO_SEGFAULT_340'): assert sys.version_info[:3] != (3, 4, 0), \ "Python 3.4.0 should not be used. It may segfault. " \ "Set NO_SEGFAULT_340=1 if your Python is patched. " \ "See https://bugs.python.org/issue21435"
# nice, though. sys.path.append(os.getcwd()) from archivebot import control from archivebot import shared_config from archivebot.seesaw import extensions from archivebot.seesaw import monitoring from archivebot.seesaw.tasks import GetItemFromQueue, StartHeartbeat, \ SetFetchDepth, PreparePaths, WriteInfo, DownloadUrlFile, \ RelabelIfAborted, MoveFiles, SetWarcFileSizeInRedis, StopHeartbeat, \ MarkItemAsDone VERSION = "20140819.03" EXPIRE_TIME = 60 * 60 * 48 # 48 hours between archive requests WPULL_EXE = find_executable('Wpull', None, [ './wpull' ]) PHANTOMJS = find_executable('PhantomJS', '1.9.7', ['phantomjs', './phantomjs'], '-v') version_integer = (sys.version_info.major * 10) + sys.version_info.minor assert version_integer >= 33, \ "This pipeline requires Python >= 3.3. You are running %s." % \ sys.version assert WPULL_EXE, 'No usable Wpull found.' assert PHANTOMJS, 'PhantomJS 1.9.0 was not found.' assert 'RSYNC_URL' in env, 'RSYNC_URL not set.' assert 'REDIS_URL' in env, 'REDIS_URL not set.' if StrictVersion(seesaw.__version__) < StrictVersion("0.1.8b1"):
import json from os import environ as env from urlparse import urlparse from seesaw.project import * from seesaw.item import * from seesaw.task import * from seesaw.pipeline import * from seesaw.externalprocess import * from seesaw.util import find_executable VERSION = "20131101.01" USER_AGENT = "ArchiveTeam ArchiveBot/%s" % VERSION EXPIRE_TIME = 60 * 60 * 48 # 48 hours between archive requests WGET_LUA = find_executable("Wget+Lua", "GNU Wget 1.14.0-archivebot1", ["./wget-lua"]) if not WGET_LUA: raise Exception("No usable Wget+Lua found.") if "RSYNC_URL" not in env: raise Exception("RSYNC_URL not set.") if "REDIS_URL" not in env: raise Exception("REDIS_URL not set.") if "LOG_CHANNEL" not in env: raise Exception("LOG_CHANNEL not set.") RSYNC_URL = env["RSYNC_URL"] REDIS_URL = env["REDIS_URL"]