def crawl_status_monitor(batch_id, batch_run_id, server_details,
                             batch_name, team_name, logger):
        while (True):
            try:
                time.sleep(10)
                # logger.info("Running Batch.get_update_count")
                Batch.get_update_count(batch_id, server_details, batch_name,
                                       team_name, logger)
                # logger.info("Checking batch status paused/completed/exception")
                if essential.get_status(batch_id) in [
                        'paused', 'completed', 'exception', 'forced'
                ]:
                    logger.info("Completed. Exiting Monitor")
                    break

            except Exception as e:
                # print(e)
                logger.error(e)
 def clean_batch_process(batch_id, server_details, team_name, batch_name):
     ins_backup_paths = []
     for each_server_local in server_details:
         server_con_details = each_server_local
         server_name = server_con_details[0]
         # server_ip = server_con_details[1]
         # server_username = server_con_details[2]
         # server_password = server_con_details[3]
         # num_of_threads_for_this_server = server_con_details[4]
         server_batch_path = '\\\\' + str(
             server_name) + '\\e$\\Panacea\\Team_data\\' + str(
                 team_name) + '\\' + str(batch_name)
         # server_batch_out_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\final_data.txt'
         # server_batch_input_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\input_file.txt'
         # server_batch_input_crawled_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\input_crawled.txt'
         # server_batch_status_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\crawling_status.pbf'
         # server_batch_pnf_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\pnf.txt'
         # server_batch_proxy_blocked_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\proxy_blocked.txt'
         # server_batch_other_exception_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\other_exception.txt'
         # server_batch_tag_failed_file = '\\\\' + str(server_name) + '\\e$\\Panacea\\Team_data\\' + str(
         #     team_name_for_batch) + '\\' + str(batch_name_local) + '\\tag_failed.txt'
         batch_backup_path = os.path.join(server_batch_path, 'backup')
         if not os.path.exists(batch_backup_path):
             os.makedirs(batch_backup_path)
         timestr = time.strftime("%Y%m%d-%H%M%S")
         ins_backup_path = os.path.join(batch_backup_path, timestr)
         ins_backup_paths.append(str(ins_backup_path))
         if not os.path.exists(ins_backup_path):
             os.makedirs(ins_backup_path)
         if os.path.exists(os.path.join(server_batch_path,
                                        'final_data.txt')):
             copy2(os.path.join(server_batch_path, 'final_data.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(os.path.join(server_batch_path,
                                        'final_data.txt'))
         if os.path.exists(os.path.join(server_batch_path,
                                        'input_file.txt')):
             copy2(os.path.join(server_batch_path, 'input_file.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(os.path.join(server_batch_path,
                                        'input_file.txt'))
         if os.path.exists(
                 os.path.join(server_batch_path, 'input_crawled.txt')):
             copy2(os.path.join(server_batch_path, 'input_crawled.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(
                     os.path.join(server_batch_path, 'input_crawled.txt'))
         if os.path.exists(
                 os.path.join(server_batch_path, 'crawling_status.pbf')):
             copy2(os.path.join(server_batch_path, 'crawling_status.pbf'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(
                     os.path.join(server_batch_path, 'crawling_status.pbf'))
         if os.path.exists(os.path.join(server_batch_path, 'pnf.txt')):
             copy2(os.path.join(server_batch_path, 'pnf.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(os.path.join(server_batch_path, 'pnf.txt'))
         if os.path.exists(
                 os.path.join(server_batch_path, 'proxy_blocked.txt')):
             copy2(os.path.join(server_batch_path, 'proxy_blocked.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(
                     os.path.join(server_batch_path, 'proxy_blocked.txt'))
         if os.path.exists(
                 os.path.join(server_batch_path, 'other_exception.txt')):
             copy2(os.path.join(server_batch_path, 'other_exception.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(
                     os.path.join(server_batch_path, 'other_exception.txt'))
         if os.path.exists(os.path.join(server_batch_path,
                                        'tag_failed.txt')):
             copy2(os.path.join(server_batch_path, 'tag_failed.txt'),
                   ins_backup_path)
             if not essential.get_status(batch_id) in ['paused', 'forced']:
                 os.remove(os.path.join(server_batch_path,
                                        'tag_failed.txt'))
         if os.path.exists(os.path.join(server_batch_path,
                                        'properties.pbf')):
             copy2(os.path.join(server_batch_path, 'properties.pbf'),
                   ins_backup_path)
             os.remove(os.path.join(server_batch_path, 'properties.pbf'))
     ins_backup_paths = "|".join(ins_backup_paths)
     essential.update_server_report(batch_id, ins_backup_paths)
     return ins_backup_paths
    def run_on_server(batch_id, batch_run_id, server, batch_name, team_name,
                      logger):
        # print(input_list)
        server_con_details = server
        server_name = server_con_details[0]
        server_ip = server_con_details[1]
        server_username = '******'
        server_password = '******'
        panacea_path = 'E:\\panacea\\Team_data'
        remote_path = '\\\\{}\\e$\\panacea\\Team_data'.format(server_ip)
        batch_path = os.path.join(panacea_path, str(team_name),
                                  str(batch_name))
        remote_batch_path = os.path.join(remote_path, str(team_name),
                                         str(batch_name))
        print(batch_path)
        # print('Starting server - ', str(server_name))
        pythoncom.CoInitialize()
        try:
            logger.info("Establishing connection to " + str(server_ip))
            print("Establishing connection to " + str(server_ip))
            connection = wmi.WMI(server_ip,
                                 user=server_username,
                                 password=server_password)
            # logger.info("Connection established")
            print("Connection established")
        except wmi.x_wmi as e:
            logger.error(str(e))
            logger.error("Your Username and Password of " +
                         getfqdn(server_ip) + " are wrong.")
            print("Your Username and Password of " + getfqdn(server_ip) +
                  " are wrong.")
            return
        try:
            working_directory = str(batch_path)
            process_id, result = connection.Win32_Process.Create(
                CommandLine='c:\\python36\\python spider.py ' +
                str(batch_run_id),
                CurrentDirectory=working_directory)
            print(process_id)
            logger.info("Process id - " + str(process_id))
            while True:
                time.sleep(2)
                process_found = paused_forced = False
                try:
                    status = essential.get_status(batch_id)
                    if status in ['paused', 'forced']:
                        paused_forced = True
                    # logger.info("fetching processes " + str(server_name))
                    all_process = connection.Win32_Process(
                        ProcessId=process_id)
                    # logger.info("processes :" + str(len(all_process)))
                    for process in all_process:
                        # print(process.ProcessId, process.Name)
                        if process_id == process.ProcessId:
                            process_found = True
                            # logger.info("Process running on " + str(server_name))
                            # print('checking')
                            if status == 'paused':
                                logger.info(
                                    "stopping thread process on server" +
                                    str(server_name))
                                print('stopping thread process on server' +
                                      str(server_name))
                                with open(
                                        os.path.join(remote_batch_path,
                                                     'properties.pbf'),
                                        'r+') as fr:
                                    data = fr.readlines()
                                    for i, line in enumerate(data):
                                        if line.split('=')[0] == 'stop':
                                            data[i] = 'stop=1'
                                with open(
                                        os.path.join(remote_batch_path,
                                                     'properties.pbf'),
                                        'w') as fr:
                                    fr.write(''.join([i for i in data]))
                            if status == 'forced':
                                process.Terminate()
                                # connection.Win32_Process(ProcessId=process_id).Terminate()
                                # process_id, result = connection.Win32_Process.Create(
                                #     CommandLine='cmd.exe /c taskkill /im python.exe /f',
                                #     CurrentDirectory=working_directory)
                                # process_id, result = connection.Win32_Process.Create(
                                #     CommandLine='cmd.exe /c taskkill /pid ' + str(process_id) + ' /f',
                                #     CurrentDirectory=working_directory)
                                time.sleep(2)

                            time.sleep(1)
                            break
                    completed = True
                    if not all_process and not paused_forced:
                        logger.info("Validating completion on server")
                        print("Validating completion on server")
                        input_url = general.read_csv(os.path.join(
                            remote_batch_path, 'input_file.txt'),
                                                     skip_header=True,
                                                     encoding='utf8')
                        crawled_url = general.read_csv(os.path.join(
                            remote_batch_path, 'input_crawled.txt'),
                                                       encoding='utf8')
                        for url in input_url:
                            if url not in crawled_url:
                                completed = False
                                logger.info("Not completed")
                                print("Not completed")
                                break
                        if not completed:
                            process_id, result = connection.Win32_Process.Create(
                                CommandLine='c:\\python36\\python spider.py ' +
                                str(batch_run_id),
                                CurrentDirectory=working_directory)
                            print(process_id)
                            logger.info(
                                "Resuming Failed batch. Process id - " +
                                str(process_id))
                    if not process_found and completed:
                        Batch.active_servers -= 1
                        logger.info("Process completed on " + str(server_name))
                        logger.info("Remaing Active server: " +
                                    str(Batch.active_servers))
                        break
                except Exception as e:
                    # print('Exception while looking for process in: ' + str(server_name) + str(e))
                    logger.info('Exception while looking for process in: ' +
                                str(server_name) + str(e))

            # try:
            #     process_id, result = connection.Win32_Process.Create(
            #         CommandLine='cmd.exe /c taskkill /im chrome.exe /f',
            #         CurrentDirectory=working_directory)
            #     process_id, result = connection.Win32_Process.Create(
            #         CommandLine='cmd.exe /c taskkill /im chromedriver.exe /f',
            #         CurrentDirectory=working_directory)
            # except Exception as e:
            #     print(e)
        except Exception as e:
            print(e)
            logger.error(e)
    def setup_environment(batch_id, server, input_list, team_name, batch_name,
                          script_path, proxies_path, batch_property,
                          num_of_attempts, time_out, logger):
        # We will use normal file transfer protocol to transfer the files between the servers
        print('setting up the environment')
        server_con_details = server
        server_name = server_con_details[0]
        num_of_threads_for_this_server = server_con_details[2]

        # Generating the server path of the current batch
        logger.info("Generating the server path of the current batch")
        panacea_path = '\\\\' + str(server_name) + '\\e$\\panacea'
        if not os.path.exists(panacea_path):
            os.makedirs(panacea_path)
        team_data_path = str(panacea_path) + '\\Team_data'
        if not os.path.exists(team_data_path):
            os.makedirs(team_data_path)
        team_path = os.path.join(team_data_path, str(team_name))
        if not os.path.exists(team_path):
            os.makedirs(team_path)
        batch_path = os.path.join(team_path, str(batch_name))
        if not os.path.exists(batch_path):
            os.makedirs(batch_path)

        # Writing new inputs and removing old files
        logger.info("Writing new inputs and removing old files")
        if not essential.get_status(batch_id) == 'resumed':
            print("Inside non resumed")
            if os.path.exists(os.path.join(batch_path, 'input_file.txt')):
                os.remove(os.path.join(batch_path, 'input_file.txt'))
            if os.path.exists(os.path.join(batch_path, 'crawling_status.pbf')):
                os.remove(os.path.join(batch_path, 'crawling_status.pbf'))
            if os.path.exists(os.path.join(batch_path, 'final_data.txt')):
                os.remove(os.path.join(batch_path, 'final_data.txt'))
            if os.path.exists(os.path.join(batch_path, 'input_crawled.txt')):
                os.remove(os.path.join(batch_path, 'input_crawled.txt'))
            if os.path.exists(os.path.join(batch_path, 'pnf.txt')):
                os.remove(os.path.join(batch_path, 'pnf.txt'))
            if os.path.exists(os.path.join(batch_path, 'proxy_blocked.txt')):
                os.remove(os.path.join(batch_path, 'proxy_blocked.txt'))
            if os.path.exists(os.path.join(batch_path, 'tag_failed.txt')):
                os.remove(os.path.join(batch_path, 'tag_failed.txt'))
            if os.path.exists(os.path.join(batch_path, 'other_exception.txt')):
                os.remove(os.path.join(batch_path, 'other_exception.txt'))
            if os.path.exists(os.path.join(batch_path, 'properties.pbf')):
                os.remove(os.path.join(batch_path, 'properties.pbf'))
            logger.info("Running essential.over_write_csv")
            essential.over_write_csv(
                os.path.join(batch_path, 'input_file.txt'), input_list)

        logger.info("Checking if we are running vbscript or python script")
        # Checking if we are running vbscript or python script
        if ".vbs" in str(script_path).lower():
            vb_spider = "E:\\Panacea\\files\\vb_spider.py"
            copy2(vb_spider, os.path.join(batch_path, 'spider.py'))
            copy2(script_path, os.path.join(batch_path, 'vbtopy.vbs'))
        else:
            copy2(script_path, os.path.join(batch_path, 'spider.py'))
        copy2(proxies_path, os.path.join(batch_path, 'proxy.pbf'))
        # copy2(status.property['main_file'], batch_path)
        # copy2(status.property['general_file'], batch_path)

        # batch_local_path = 'E:\\Panacea\\Team_data\\' + str(team_name) + '\\' + str(batch_name)
        batch_property_path = os.path.join(batch_path, 'properties.pbf')
        property_data_for_batch = []
        # property_data_for_batch.append(['project_name=' + str(batch_local_path)])
        # property_data_for_batch.append(['region=DE'])
        for key, val in batch_property.items():
            property_data_for_batch.append([str(key) + '=' + str(val)])
        property_data_for_batch.append(
            ['number_of_threads=' + str(num_of_threads_for_this_server)])
        # property_data_for_batch.append(['num_of_attempts=' + str(num_of_attempts)])
        # property_data_for_batch.append(['time_out=' + str(time_out)])
        logger.info("Running essential.over_write_csv")
        essential.over_write_csv(batch_property_path, property_data_for_batch)

        return batch_path.replace(panacea_path, 'E:\\Panacea')
    def start(batch_id,
              batch_run_id,
              batch_data,
              region,
              team_name,
              user_id,
              input_file_path,
              script_path,
              proxies_file_path,
              server_details,
              num_of_attempts,
              time_out,
              logger,
              encoding='utf-8'):
        try:
            batch_name = batch_data
            # log_path = os.path.join("E:\\panacea", "team_data", str(team_name), "Batches", str(batch_name), "logs",
            #                         str('batch.log'))
            # print(log_path)
            # logging.basicConfig(filename=log_path,
            #                     filemode='a',
            #                     format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
            #                     datefmt='%H:%M:%S',
            #                     level=logging.DEBUG)
            #
            # logger = logging.getLogger(str(user_id))
            startTime = datetime.datetime.now()
            logger.info("Batch_start_time - " + str(startTime))
            # print(status.batch_status[team_name][batch_name])
            # batch_log_file = status.current_path + '/Team_data/' + str(team_name) + '/Batches/' + str(batch_name) + '/logs/batch_logs.log'
            # batchlog = log_Writer(batch_log_file)
        except Exception as e:
            print(e)
            essential.write_file(
                str(status.current_path) + '/logs/' + str(team_name) + '.txt',
                str(e) + ' | ' + str(batch_data) +
                ' | Exception in start method while creating local folders and batch log object of batch.py'
                + str(datetime.datetime.now()))
            return

        try:
            print('------------- Starting Batch -------- ' + str(batch_name))
            logger.info("Starting batch")
            # batchlog.write('Starting Batch - ' + str(batch_name) + ' - ' + str(datetime.datetime.now()))  # Writing Batch logs
            # batch_properties_file = status.current_path + '/Team_data/' + str(team_name) + '/Batches/' + str(batch_name) + '/batch_properties.pbf'
            batch_property = {
                'region': str(region),
                'num_of_attempts': num_of_attempts,
                'time_out': time_out,
                'user_name': 'panacea',
                'resume_crawl': 'on',
                'encoding': encoding,
                'stop': 0
            }
            proxies_path = 'E:/Harsh/django_projects/panacea/media/' + proxies_file_path
            script_path = 'E:/Harsh/django_projects/panacea/media/' + script_path
            # server_details_path = status.current_path + '/Team_data/' + str(team_name) + '/Servers/' + batch_property['servers']
            logger.info(
                "Converting server dettails into list(essential.pipe_to_list)")
            server_details = essential.pipe_to_list(server_details)
            input_file_path = 'E:/Harsh/django_projects/panacea/media/' + input_file_path
            logger.info("distributing inputs(essential.distribute_input)")
            if not essential.get_status(batch_id) == 'resumed':
                essential.update_status(batch_id, "splitting")
            total_inputs, dist_inputs = essential.distribute_input(
                input_file_path, server_details, batch_id)

            # this thread will monitor the status for crawling on all the servers
            logger.info("Starting thread for status monitor")
            crawl_status_monitor_thread_name = 'crawl_status_monitor'
            t1 = threading.Thread(target=Batch.crawl_status_monitor,
                                  name=crawl_status_monitor_thread_name,
                                  args=[
                                      batch_id, batch_run_id, server_details,
                                      batch_name, team_name, logger
                                  ])
            t1.daemon = True
            t1.start()

            # updating batch status in database
            # status.batch_status = 'running'
            logger.info("Checking resumed/running batch condition")
            if not essential.get_status(batch_id) == 'resumed':
                essential.update_status(batch_id, "distributing")

            input_itr = 0
            server_threads = []
            logger.info("Checking if the script type is vbs")
            if ".vbs" in str(script_path).lower():
                py_script_path = os.path.join(
                    os.path.abspath(os.path.join(script_path, os.pardir)),
                    str(datetime.datetime.now().strftime("%d_%m_%y_%H_%M_%S"))
                    + ".vbs")
                logger.info("Running Batch.make_vb_compatible")
                Batch.make_vb_compatible(script_path, py_script_path)
                script_path = py_script_path
            logger.info("Setting up the environment in each server")

            for i, each_server in enumerate(server_details):
                # Setup the environment on each server
                if not essential.get_status(batch_id) == 'resumed':
                    essential.update_status(
                        batch_id,
                        "distributing-" + str(len(server_details) - i))
                logger.info("environment setup-" + str(each_server[0]))
                batch_path = Batch.setup_environment(
                    batch_id, each_server, dist_inputs[input_itr], team_name,
                    batch_name, script_path, proxies_path, batch_property,
                    num_of_attempts, time_out, logger)
                input_itr += 1
            logger.info("Checking if the batch status is resumed")
            essential.update_status(batch_id, "initiating")
            logger.info("Initiating run_on_server")
            for i, each_server in enumerate(server_details):
                essential.update_status(
                    batch_id, "initiating-" + str(len(server_details) - i))
                server_name = each_server[0]
                logger.info("Initiating process on-" + str(server_name))
                t = threading.Thread(target=Batch.run_on_server,
                                     name=server_name,
                                     args=[
                                         batch_id, batch_run_id, each_server,
                                         batch_name, team_name, logger
                                     ])
                t.daemon = True
                t.start()
                server_threads.append(t)
                Batch.active_servers += 1
            logger.info("batch is running")
            time.sleep(10)
            essential.update_status(batch_id, "running")
            logger.info("Joining")
            for t in server_threads:
                t.join()
            logger.info("Running exclusive Batch.get_update_count")
            Batch.get_update_count(batch_id, server_details, batch_name,
                                   team_name, logger)
            ins_backup_paths = Batch.clean_batch_process(
                batch_id, server_details, team_name, batch_name)
            # Batch.generate_report(ins_backup_paths, team_name, batch_name, batch_id)
            # updating batch status in database
            # status.batch_status = 'completed'
            print('batch - ' + str(batch_name) + ' for Team - ' +
                  str(team_name) + ' has Completed')
            logger.info("batch completed")
            # batchlog.write('Batch Completed' + ' - ' + str(batch_name) + ' - ' + str(datetime.datetime.now()))  # Writing Batch logs
            batch_state = essential.get_status(batch_id)
            essential.update_status(batch_id, "generating report")
            try:
                logger.info("Generationg Report for " + batch_name)
                print("Generationg Report for " + batch_name)
                crawl.tasks.generate_batch_report("", team_name, batch_name,
                                                  batch_id, batch_run_id)
            except Exception as e:
                logger.error('Report Generation Failed:' + str(e))
                print('Report Generation Failed:' + str(e))
            essential.update_status(batch_id, "analysing proxies")
            try:
                logger.info("Generationg Proxy Analysis for " + batch_name)
                print("Generationg Proxy Analysis for " + batch_name)
                Batch.get_proxy_analysis(batch_id, server_details, batch_name,
                                         team_name, logger)
                logger.info("Done!")
            except:
                logger.error('Proxy Analysis Failed:' + str(e))
                print('Proxy Analysis Failed:' + str(e))
            if batch_state == 'paused':
                essential.update_status(batch_id, 'paused')
            if batch_state == 'forced':
                essential.update_status(batch_id, 'forced')
            if not essential.get_status(batch_id) in [
                    'paused', 'stopped', 'forced'
            ]:
                essential.update_status(batch_id, 'completed')
        except Exception as e:
            print(e)
            logger.error(e)
            # batchlog.write(str(e) + ' | ' + str(batch_name) + ' | Exception in starting of batch in batch.py | ' + str(datetime.datetime.now()))  # Writing Batch logs
            # updating batch status in database
            # status.batch_status = 'exception'
            logger.error("Running essential.update_status")
            essential.update_status(batch_id, "exception")
            # status.batch_in_system[team_name].remove(batch_data)
            return