def process_dir(delete_source, dir_src, bn_dir_src, dir_dst, dir_tmp): logging.debug("Processing dir BEGIN: %s" % dir_src) zfile_path_tmp=tempfile.mktemp(dir=dir_tmp) logging.debug("Creating archive '%s'" % zfile_path_tmp) try: zfile=zipfile.ZipFile(zfile_path_tmp, "w") os.chdir(dir_src) code, files=get_root_files(dir_src, strip_dirname=True) if code!="ok": raise Exception("Can't get files from dir: %s" % dir_src) for fichier in files: zfile.write(fichier) zfile.close() except Exception, e: rm(zfile_path_tmp) raise Exception("Can't generate zip archive: %s (%s)" % (zfile_path_tmp, e))
def cleanup(): rm(dst_path) if auto_queue or delete_queue: logging.info("... deleting queue") try: sqs_conn.delete_queue(q) except: pass
def run( dst_path=None, topic_name=None, queue_name=None, mod_sub=None, mod_pub=None, polling_interval=None, force=False, node_id=None, force_delete=False, delete_queue=None, proto_n=None, proto_m=None, **_ ): signal.signal(signal.SIGTERM, handlerSigTerm) ### STARTUP CHECKS ################## if proto_n > proto_m: raise Exception("Parameter 'n' must be smaller than 'm'") if os.path.isdir(dst_path): raise Exception("'dst_path' must be a filename, not a directory") if force_delete: logging.info("Attempting to delete '%s'" % dst_path) rm(dst_path) if os.path.isfile(dst_path): raise Exception("'dst_path' must not exists at startup: use -fd to delete") dir_path = os.path.dirname(dst_path) code, _msg = can_write(dir_path) if not code.startswith("ok"): raise Exception("directory '%s' is not writable" % dir_path) ### SETUP ########################### if node_id is None: node_id = str(uuid.uuid1()) logging.info("Node id: %s" % node_id) proc = protocol_processor(node_id, proto_n, proto_m) ### START MAIN LOOP ###################################### run_aws(node_id, proc, polling_interval, queue_name, topic_name, dst_path, delete_queue) """
def process_file( enable_progress_report, bucket_name, prefix, k, src_filename, p_dst, enable_delete, propagate_error, write_done ): uploaded = False ctx = {"src": src_filename, "key": k.name, "bucket": bucket_name, "prefix": prefix} # 1) Upload to S3 try: k.set_contents_from_filename(src_filename) report(ctx, {"code": "ok", "kind": "upload"}) if enable_progress_report: logging.info("progress: uploaded file %s" % src_filename) uploaded = True if write_done: do_write_done(src_filename) except: if propagate_error: report(ctx, {"code": "error", "kind": "upload"}) return False # 2a) Delete if enable_delete: code, msg = rm(src_filename) if not code.startswith("ok"): logging.debug("Error deleting: %s (%s)" % (src_filename, msg)) if not propagate_error: return True else: if enable_progress_report: logging.info("progress: deleted file %s" % src_filename) report(ctx, {"code": code, "kind": "delete"}) # 2b) Move else: bname = os.path.basename(src_filename) dst_filename = os.path.join(p_dst, bname) code, msg = move(src_filename, dst_filename) if not code.startswith("ok"): ## 1 last chance... try recreating the dst directory for next run... mkdir_p(p_dst) logging.debug("Error moving: %s ==> %s (%s)" % (src_filename, dst_filename, msg)) if not propagate_error: return True else: if enable_progress_report: logging.info("progress: moved file %s ==> %s" % (src_filename, dst_filename)) report(ctx, {"code": code, "kind": "move", "dst": dst_filename}) return uploaded
def do_common(mode_dir, bn_dir_src, del_dir_dst, dir_src, dir_dst, dir_tmp): dpath=maybe_del_dir(mode_dir, del_dir_dst, bn_dir_src, dir_dst) try: tdir=tempfile.mkdtemp(dir=dir_tmp) except: raise Exception("Can't create temporary directory") tpath=os.path.join(tdir, bn_dir_src) ## rename if mode_dir: tpath=os.path.splitext(tpath)[0] logging.debug("Moving to temp dir: %s => %s" % (dir_src, tpath)) code, _=move(dir_src, tpath) if not code.startswith("ok"): rm(tdir) raise Exception("Can't move '%s' in a temp directory" % tpath) return dpath, tdir, tpath
def process(src_file, dst_file, enable_delete): """ 1. read file, extract URL 2. send "start" 3. send each line 4. send "end" 5. move/delete source file """ code, contents=file_contents(src_file) if not code.startswith("ok"): return ("error", "file/invalid") try: contents=contents.strip() lines=contents.split("\n") except: return ("error", "data/invalid") ############################################### try: stdout({"sp": src_file, "code":"begin"}) for line in lines: stdout({"code": "line", "line": line}) stdout({"sp": src_file, "code":"end"}) except: raise BrokenPipe("Broken Pipe") ############################################### if enable_delete: code, _msg=rm(src_file) if not code.startswith("ok"): logging.error("Can't delete '%s'" % src_file) return ("error", "file/delete") return ("ok", None) ### well then, we need to move the source_file code, _=move(src_file, dst_file) return (code, "file/move")
def do_move(mode_dir, ext_done, del_dir_dst, dir_src, bn_dir_src, dir_dst, dir_tmp): """ - Move path directory to a temporary dir in 'dir_tmp' - Delete 'file.ext_done' files - Move path directory to 'dir_dst' """ dpath, tdir, tpath=do_common(mode_dir, bn_dir_src, del_dir_dst, dir_src, dir_dst, dir_tmp) if not mode_dir: ## next, delete all files with extension 'ext_done' code, maybe_files=get_root_files(tpath) if not code.startswith("ok"): rmdir(tdir) raise Exception("Can't get the filenames of temp dir: %s" % tpath) def _cmp(path): return path.endswith(ext_done) liste=filter(_cmp, maybe_files) logging.debug("Deleting '%s' files with extension '%s'" % (len(liste), ext_done)) for f in liste: code, _=rm(f) if not code.startswith("ok"): logging.warning("Can't delete file '%s'... aborting" % f) rmdir(tdir) return ## last, move to final destination logging.debug("Moving '%s' to final dir '%s'" % (tpath, dpath)) code, _=move(tpath, dpath) rmdir(tdir) logging.debug("Removed temp dir: %s" % tdir) if not code.startswith("ok"): raise Exception("Can't move '%s' to directory" % dpath) logging.info("Processed directory: %s" % dir_src)
def process(src_file, dest_path, delete_fetch_error): """ 1. read file, extract URL 2. fetch file from URL 3. write fetched file to dest_path 4. delete pointer file """ code, contents=file_contents(src_file) if not code.startswith("ok"): logging.error("Can't read file contents from '%s'" % src_file) return try: url=contents.strip() except: raise Exception("Invalid data in file: %s" % src_file) code, (http_code, headers, data)=fetch(url) if not code.startswith("ok"): if delete_fetch_error: code, _msg=rm(src_file) logging.warning("Attempting to delete source file '%s': %s" % (src_file, code)) raise Exception("Can't fetch page from url: %s" % url) try: http_code=int(http_code) except: pass if http_code!=200: logging.error("Can't fetch url '%s', http response code: %s" % (url, http_code)) return code, maybe_components=extract_url_filename(url) if not code.startswith("ok"): fbn=str(uuid.uuid1()) dest_filename=os.path.join(dest_path, fbn) else: fbn, fext=maybe_components dest_filename=os.path.join(dest_path, fbn)+fext try: exists=os.path.exists(dest_filename) except: exists=False if exists: fbn=str(uuid.uuid1()) dest_filename=os.path.join(dest_path, fbn) code, msg=atomic_write(dest_filename, data) if not code.startswith("ok"): raise Exception("Can't write to file '%s': %s" % (dest_filename, msg)) ctx={ "dest_filename": dest_filename ,"src_filename": src_file ,"url": url ,"http_code": http_code ,"headers": headers } ### no need code, msg=rm(src_file) if not code.startswith("ok"): logging.error("Can't delete '%s' : will probably cause excessive downloads..." % src_file) try: sys.stdout.write(json.dumps(ctx)+"\n") except: raise BrokenPipe()
def run_aws(node_id, proc, polling_interval, queue_name, topic_name, dst_path, delete_queue): if topic_name is None: raise Exception("Need a topic_name") auto_queue = False if queue_name is None: auto_queue = True queue_name = gen_queue_name() def setup_private_queue(): conn = boto.connect_sqs() q = conn.create_queue(queue_name) q.set_message_class(RawMessage) return (conn, q) # SETUP PRIVATE QUEUE logging.info("Creating queue '%s'" % queue_name) sqs_conn, q = retry(setup_private_queue, logmsg="Having trouble creating queue...") topic_arn = build_topic_arn(sqs_conn, topic_name) logging.info("topic_arn: %s" % topic_arn) ### create topic def create_topic(): """ {'CreateTopicResponse': {'ResponseMetadata': {'RequestId': '5e2c6700-4dd0-11e1-b421-41716ce69b95'}, 'CreateTopicResult': {'TopicArn': 'arn:aws:sns:us-east-1:674707187858:election'}}} """ snsconn = boto.connect_sns() snsconn.create_topic(topic_name) retry(create_topic, logmsg="Having trouble creating topic...") # SUBSCRIBE TO TOPIC def sub_topic(): snsconn = boto.connect_sns() snsconn.subscribe_sqs_queue(topic_arn, q) return snsconn snsconn = retry(sub_topic, logmsg="Having trouble subscribing queue to topic...") logging.info("Subscribed to topic '%s'" % topic_name) current_state = "NL" MSGS = {"NL": "Leadership lost", "L": "Leadership gained", "ML": "Leadership momentum"} poll_count = 0 def cleanup(): rm(dst_path) if auto_queue or delete_queue: logging.info("... deleting queue") try: sqs_conn.delete_queue(q) except: pass ppid = os.getppid() logging.info("Process pid: %s" % os.getpid()) logging.info("Parent pid: %s" % ppid) logging.info("Starting loop...") while True: if os.getppid() != ppid: logging.warning("Parent terminated... exiting") cleanup() break try: ## do a bit of garbage collection :) global sigtermReceived if sigtermReceived: cleanup() raise SignalTerminate() ######################################### try: ### BATCH PROCESS - required!!! while True: rawmsg = q.read() if rawmsg is not None: jsonmsg = json.loads(rawmsg.get_body()) q.delete_message(rawmsg) ## SNS encapsulates the original message... nodeid = str(jsonmsg["Message"]) transition, current_state = proc.send((poll_count, nodeid)) jstdout({"state": current_state}) if transition: logging.info(MSGS[current_state]) if current_state == "L": code, _ = touch(dst_path) logging.info("Created '%s': %s" % (dst_path, code)) else: code, _ = rm(dst_path) logging.info("Deleted '%s': %s" % (dst_path, code)) else: break except SQSDecodeError: logging.warning("Message decoding error") except Exception, e: logging.error(str(e)) continue msg = str(node_id) logging.debug("Publishing our 'node id': %s" % node_id) try: snsconn.publish(topic_arn, msg) except: try: snsconn.publish(topic_arn, msg) except: logging.warning("Can't publish to topic '%s'" % topic_name) logging.debug("... sleeping for %s seconds" % polling_interval) sleep(polling_interval) poll_count = poll_count + 1 except KeyboardInterrupt: cleanup() raise