def do_process(shared_job_q,shared_result_q,top_src_dir,top_dst_dir,operation):
    global g_process_start_time
    global g_process_start_readable_time
    file_list = []
    name_list = _get_samplename_list()
    print 'begin collecting file information...'
    for root,dirs,files in os.walk(top_src_dir):
        for name in files:
            src_file = os.path.join(root,name)
            file_list.append(src_file)
            
            if operation == OP_PREPROCESS:
                tail_path = src_file[len(top_src_dir):]
                if top_dst_dir[-1] != '/':
                    fet_file = top_dst_dir+'/'+tail_path
                else:
                    fet_file = top_dst_dir + tail_path
                fet_dir = fet_file[0:fet_file.rfind('/')]
                if not os.path.exists(fet_dir):
                    os.makedirs(fet_dir)
            elif operation == OP_MIST2VECTOR or operation == OP_MERGEFET:
                name_found = False
                for name in name_list:
                    if src_file.find(name)>=0:
                        name_found = True
                        break
                if not name_found:
                    print 'expect Sample name in full path: ', src_file
                    sys.exit()
            elif operation == OP_MERGETABLE:
                pass
            else:
                print 'unexpected operation: ',operation
                sys.exit()

    print '%d files to process...'%len(file_list)  
        
    if operation == OP_MERGETABLE:
        table_dict = {}
        for sub_table_file in file_list:
            pos = sub_table_file.rfind('.')
            if pos!=-1:
                table_name = sub_table_file[pos+1:]
            if table_dict.has_key(table_name):
                table_dict[table_name].append(sub_table_file)
            else:
                table_dict[table_name]=[sub_table_file]
        #chunk_size here is an average size
        num_files = len(file_list)
        num_jobs = len(table_dict)
        chunk_size = int(num_files/num_jobs)
        for (k,v) in table_dict.items():
            shared_job_q.put(v)
        print 'chunk_size=%d, num_jobs=%d,job_q size=%d' %(chunk_size,num_jobs,shared_job_q.qsize())
        if(shared_job_q.qsize() < 10):
            print 'job_q size is too small so unnecessary to run cluster. maybe something wrong??? check it:)'
            #sys.exit()
        chunk_processed = 0
        while chunk_processed < num_jobs:
            chunk_processed += shared_result_q.get()
            if g_process_start_time == 0:
                print 'setting process_start_time...'
                g_process_start_time = time.time()
                g_process_start_readable_time = common.get_readable_time()
            print 'chunk processed: ',chunk_processed
        print 'all chunks processed!'
        return
   
    chunk_size = common.g_chunk_size
    num_files = len(file_list)
    num_jobs = int(num_files/chunk_size);
    for i in range(0,num_jobs):
        shared_job_q.put(file_list[i*chunk_size:(i+1)*chunk_size])
    if num_jobs*chunk_size < num_files:
        shared_job_q.put(file_list[num_jobs*chunk_size:num_files])

    print 'chunk_size=%d, num_jobs=%d,job_q size=%d' %(chunk_size,num_jobs,shared_job_q.qsize())
    if(shared_job_q.qsize() < 10):
        print 'job_q size is too small so unnecessary to run cluster. maybe something wrong??? check it:)'
        sys.exit()
    
    files_processed = 0
    while files_processed < num_files:
        files_processed += shared_result_q.get()
        if g_process_start_time == 0:
            print 'setting process_start_time...'
            g_process_start_time = time.time()
            g_process_start_readable_time = common.get_readable_time()
        print '[%s] files processed: %d'%(common.get_readable_time(),files_processed)
    print 'all files processed!'
            sys.exit()
        if dir_of_target.find(common.g_sample_table) == -1:
            print 'invalid dir_of_target: ',dir_of_target, ' must contains ', common.g_sample_table
            sys.exit()
        if os.path.exists(dir_of_target): 
            try:
                os.rmdir(dir_of_target)
            except:
                print 'remove directory failed, any files in it? make sure. dir=',dir_of_target
                exit(-1)
        os.mkdir(dir_of_target)
    else:
        print 'unexpected operation: ',operation
        sys.exit()

    manager = make_server_manager(listen_port,common.g_auth_key)
    shared_job_q = manager.get_job_q()
    shared_result_q = manager.get_result_q()

    do_process(shared_job_q,shared_result_q,dir_of_source,dir_of_target,operation)

    process_stop_time = time.time()
    print 'process time = %d'%(process_stop_time-g_process_start_time)
    print 'process started at ',g_process_start_readable_time,' stop at ',common.get_readable_time()
    time.sleep(1)
    print 'server shutting down...'
    manager.shutdown()