Example #1
0
def run(polling_interval=None, source_url=None, 
        batch_size=None,
        format_json=None, propagate_error=None, check_path=None
        ,**_):
    
    proc=process(source_url, propagate_error, format_json, batch_size)
    
    if check_path is not None:
        ct=check_transition()

    ppid=os.getppid()    
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid: %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True
                

        if exists:            
            #########################################################
            status, (code, headers, data)=fetch(source_url)
            if status.startswith("ok"):
                proc.send((code, headers, data))
            else:
                if propagate_error:
                    stdout('''{"status":"error", "kind":"fetch", "source_url":"%s", "http_code":"%s"}''' % (source_url, code))
            #########################################################

        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
Example #2
0
def run(source_path=None, move_path=None, check_path=None, 
        batch_size=5, 
        polling_interval=None, enable_delete=False
        ,**_):

    if check_path is not None:
        ct=check_transition()

    if enable_delete and move_path is not None:
        raise Exception("Options '-mp' and '-d' are mutually exclusive")
        
    code, rp=resolve_path(source_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve source path '%s'" % source_path)
    source_path=rp
    
    if move_path is not None:
        code, rp=resolve_path(move_path)
        if not code.startswith("ok"):
            raise Exception("can't resolve 'move_path' '%s'" % move_path)
        move_path=rp

        logging.info("Creating (if necessary) 'move' path: %s" % move_path)
        code, msg=mkdir_p(move_path)
        if not code.startswith("ok"):
            raise Exception("Can't create move path '%s': %s" % (move_path, str(msg)))
            
        logging.info("Checking if 'move' directory is writable")
        code, msg=can_write(move_path)
        if not code.startswith("ok"):
            raise Exception("Can't write to 'move' directory")
            
    to_skip=[]
    logging.info("Process pid: %s" % os.getpid())
    ppid=os.getppid()
    logging.info("Parent pid : %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True

        if exists:        
            code, files=get_root_files(source_path)
            if not code.startswith("ok"):
                logging.error("Can't get root files from %s" % source_path)
            else:                
                ###############################################################
                files=files[:batch_size]
                try:
                    for src_file in files:
                        
                        if src_file in to_skip:
                            continue
                        
                        code, _=can_write(src_file)
                        if not code.startswith("ok"):
                            to_skip.append(src_file)
                            logging.error("Would not be able to move/delete source file '%s'... skipping streaming" % src_file)
                            continue
        
                        dst_file=None                
                        if move_path is not None:
                            bn=os.path.basename(src_file)
                            dst_file=os.path.join(move_path, bn)
                        
                        code, maybe_error=process(src_file, dst_file, enable_delete)
                        if not code.startswith("ok"):
                            to_skip.append(src_file)
                            logging.warning("Problem processing file '%s': %s" % (src_file, maybe_error))
                except BrokenPipe:
                    raise
                except KeyboardInterrupt:
                    raise
                except Exception, e:
                    logging.error("processing file '%s': %s" % (src_file, str(e)))
                ###############################################################            
        
        
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
Example #3
0
def run(primary_path=None, compare_path=None, 
        dest_path=None,
        status_filename=None, check_path=None
        ,just_basename=None
        ,topic_name=None
        ,exts=None
        ,wait_status=None, polling_interval=None
        ,just_zppp=None, just_ppzp=None, just_com=None
        ,**_):

    if check_path is not None:
        ct=check_transition()

    if dest_path:
        code, dest_path=resolve_path(dest_path)
        if not code.startswith("ok"):
            raise Exception("can't destination path '%s'" % dest_path)
        
        logging.info("Creating (if necessary) destination path: %s" % dest_path)
        code, msg=mkdir_p(dest_path)
        if code!="ok":
            raise Exception("Can't create path: %s" % dest_path)

    code, primary_path=resolve_path(primary_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve primary path '%s'" % primary_path)
    
    logging.info("Creating (if necessary) primary path: %s" % primary_path)
    mkdir_p(primary_path)
    
    code, compare_path=resolve_path(compare_path)
    if not code.startswith("ok"):
        raise Exception("can't resolve compare path '%s'" % compare_path)

    logging.info("Creating (if necessary) compare path: %s" % compare_path)
    mkdir_p(compare_path)
            
    if wait_status:
        status_path=os.path.join(primary_path, status_filename)
        logging.info("Using status file path: %s" % status_path)
    else: 
        status_path=None

    ### context for logging etc.
    ctx={
          "just_zppp": just_zppp
         ,"just_ppzp": just_ppzp
         ,"just_com":  just_com
         ,"just_list": just_zppp or just_ppzp or just_com
         
         ,"pp": primary_path
         ,"zp": compare_path
         ,"sp": status_path
         
         ,"pp_log" :{"up":    partial(ilog, primary_path)
                     ,"down":  partial(wlog, primary_path)
                     }
         ,"zp_log" :{"up":    partial(ilog, compare_path)
                     ,"down":  partial(wlog, compare_path)
                     }
         ,"topic_name": topic_name
         ,"exts": exts
         }

    ctx["tm"]=transition_manager(ctx)
    
    ppid=os.getppid()        
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent  pid: %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
            
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True

        if exists:            
            code, msg=check_if_ok(status_path, default="ok")
            maybe_process(ctx, code, msg, primary_path, compare_path, just_basename, dest_path)
        
        logging.debug("...sleeping for %s seconds" % polling_interval)
        sleep(polling_interval)
Example #4
0
def run(source_path=None, dest_path=None, check_path=None, 
        batch_size=5, 
        polling_interval=None, delete_fetch_error=False
        ,**_):
    
    if check_path is not None:
        ct=check_transition()
    
    logging.info("Creating (if necessary) destination path: %s" % dest_path)
    code, msg=mkdir_p(dest_path)
    if not code.startswith("ok"):
        raise Exception("Can't create destination path '%s': %s" % (dest_path, str(msg)))
            
    to_skip=[]
    ppid=os.getppid()
    logging.info("Process pid: %s" % os.getpid())
    logging.info("Parent pid : %s" % ppid)
    logging.info("Starting loop...")
    while True:
        if os.getppid()!=ppid:
            logging.warning("Parent terminated... exiting")
            break
        
        if check_path is not None:
            try:    exists=os.path.exists(check_path)
            except: exists=False
            
            maybe_tr, _=ct.send(exists)
            if maybe_tr=="tr" and exists:
                logging.info("Check path: passed")
            if maybe_tr=="tr" and not exists:
                logging.info("Check path: failed - skipping")
        else:
            ## fake 'exists'
            exists=True

        if exists:        
            code, files=get_root_files(source_path)
            if not code.startswith("ok"):
                logging.error("Can't get root files from %s" % source_path)
                continue
            
            ###############################################################
            files=files[:batch_size]
            try:
                for src_file in files:
                    
                    if src_file in to_skip:
                        continue
                    
                    code, _=can_write(src_file)
                    if not code.startswith("ok"):
                        to_skip.append(src_file)
                        logging.error("Would not be able to delete source file '%s'... skipping download" % src_file)
                        continue
                    
                    process(src_file, dest_path, delete_fetch_error)
            except BrokenPipe:
                raise
            except Exception, e:
                logging.error("processing file '%s': %s" % (src_file, str(e)))
            ###############################################################            
        
        
        logging.debug("...waiting for %s seconds (max)" % polling_interval)
        
        ### Implement a "pass-through" for stdin --> stdout
        ###  whilst also handling a maximum time-out
        start_time=time.time()
        while True:
            ir, _w, _e=select.select([sys.stdin], [], [], polling_interval)
            if len(ir):
                iline=sys.stdin.readline()
                sys.stdout.write(iline)
                
            elapsed_time = time.time() - start_time
            if elapsed_time > polling_interval:
                break