Beispiel #1
0
def write_to_file(file_handle, lines):
    """
    Write the provided lines to the provided file_handle. If an error occurs an error message will be logged to stdout.

    :type lines: list
    :type file_handle: file

    :param file_handle: the file that the provided lines should be written
    :param lines: the lines that should be written to the provided file.
    :return: void
    """
    if not file_handle or not isinstance(file_handle, file):
        logging.error('Received an invalid file reference.')
        return
    if lines:
        try:
            if args.verbose:
                logging.info("Writing {0} lines to {1}", len(lines), file_handle.name)

            file_handle.writelines(lines)

            if args.verbose:
                logging.info("Successfully wrote {0} lines to {1}", len(lines), file_handle.name)

        except IOError, e:
            logging.error('There was an error while writing to {0}. {1}: {2}', file_handle.name, e.args[0], e.args[1])
Beispiel #2
0
def read_from_file(file_handle):
    """
    Read the contents from the provided file_handle and return them. If there is an error then a message detailing the
    problem will be written to stdout and None will be returned.

    :type file_handle file

    :param file_handle: the file object to read from.
    :return: the contents of the file or, if an error is encountered, None.
    """
    if file_handle and isinstance(file_handle, file):

        # ADDING: verbose logging before the operation.
        if args.verbose:
            logging.info('Attempting to read in contents of {0}', file_handle.name)

        try:

            # ATTEMPT: to read the contents of the file.
            contents = file_handle.read()

            # PROVIDE: the user some verbose success logging.
            if args.verbose:
                logging.info('Successfully read the contents of {0}', file_handle.name)

            # RETURN: the contents of the file.
            return contents
        except IOError, e:
            logging.error('There was a problem reading from {0}. {1}: {2}', file_handle.name, e.args[0], e.args[1])
            return None
Beispiel #3
0
def dataCleaning():
    logging.info('===Data Cleaning Processing===')
    input_file = DATA_TRAINING_SAMPLE
    adClickCntList = generateTopAdsUsersByClick(input_file)
    dumpList2File(adClickCntList, TMP_DATA_DIR_PATH + 'topAdClickCnt.dict')

    adSet = set()
    for line in file(TMP_DATA_DIR_PATH + 'topAdClickCnt.dict'):
        cnt, adid = line.strip().split()
        adSet.add(adid)
    logging.debug(len(adSet))
    ad2Users = generateAd2UsersGivenAdSet(input_file, adSet)
    dumpDict2File(ad2Users, TMP_DATA_DIR_PATH + 'ad2UsersGivenAdSet.dict')
    userDict = generateUser2AdGivenAd2User(TMP_DATA_DIR_PATH +
                                           'ad2UsersGivenAdSet.dict',
                                           adViewThreshold=10)
    dumpDict2File(userDict, TMP_DATA_DIR_PATH + 'user2AdGivenAd2User.dict')
    userSet = set()
    logging.debug(len(userSet))
    for line in file(TMP_DATA_DIR_PATH + 'user2AdGivenAd2User.dict'):
        user, ads = line.strip().split('\x01')
        userSet.add(user)

    dumpUserRawFeatureGivenUserSet(input_file, userSet,
                                   TMP_DATA_DIR_PATH + 'userRawFeature.dict')
Beispiel #4
0
def featureEngineering():
    logging.info('===Feature Engineering Processing===')
    query_set, desc_set, title_set = getUserFeatureSet()
    aggregateUserfile = TMP_DATA_DIR_PATH + 'userRawFeature.dict'
    expandId2TokensResultFile = TMP_DATA_DIR_PATH + 'userRawExpandTokens.dict'
    expandFeatureId2Tokens(aggregateUserfile, expandId2TokensResultFile,
                           query_set, desc_set, title_set)

    tmp_file = file(TMP_DATA_DIR_PATH + 'tmp', 'w')
    fn_userRawExpandTokens = expandId2TokensResultFile
    for line in file(fn_userRawExpandTokens):
        userid, query, title, desc = line.strip().split('\x01')
        tmp_file.write(query)
        tmp_file.write('\n')
    tmp_file.close()

    lda = topicLDA.LDA(TMP_DATA_DIR_PATH + 'tmp')
    lda.run(num_topics=200,
            fn_bow=TMP_DATA_DIR_PATH + 'corpus.svmlight',
            fn_out_topic=TMP_DATA_DIR_PATH + 'LDA_corpus.svmlight')
    os.system('rm ' + TMP_DATA_DIR_PATH + 'tmp')

    joinResult4SVMRanking(
        fn_trainFeature=TMP_DATA_DIR_PATH + 'LDA_corpus.svmlight',
        fn_ad2userStatus=TMP_DATA_DIR_PATH + 'ad2userStatus.dict',
        fn_out_SVMRanking=TMP_DATA_DIR_PATH + 'finalData4SVMRanking.dat')
Beispiel #5
0
def run() :
    Adset, UserSet = userStatusWorkflow.getPreSet(DATA_TRAINING)
    for adid in Adset :
        logging.info('Now handling AD:' + adid)
        featureExtracter.workflow(adid, False, False)
        featureExtracter.workflow(adid, True, False)
        featureExtracter.workflow(adid, True, True)
Beispiel #6
0
def monitor_publish_request_count(p, t_pstart, append_if_p_killed):
    while True:
        if append_if_p_killed:
            logging.warn(
                '(monitor_publish_request_count) the process is to be restarted. breaking out of the publish request monitoring loop'
            )
            break

        logging.info(
            '(monitor_publish_request_count) request_count: {c}'.format(
                c=request_count))
        t = datetime.datetime.now()
        dt = t - t_pstart
        if dt.seconds < 60 * 10 and request_count == 0:
            logging.info(
                '(monitor_publish_request_count) request count is zero but still in the warm up period: {s} seconds'
                .format(s=dt.seconds))
        elif dt.seconds > 60 * 10 and request_count == 0:
            logging.warn(
                '(monitor_publish_request_count) request count is zero')
            try:
                logging.warn(
                    '(monitor_publish_request_count) restarting as publish req count is zero after warnup'
                )
                append_if_p_killed.append("dummy")
                os.killpg(os.getpgid(p.pid), signal.SIGKILL
                          )  # Send the signal to all the process groups
            except Exception as ex:
                print(ex)
            break
        time.sleep(10)
def rankingFeatureSelection () :
    adset = set([line.strip().split()[1] for line in file(TMP_DATA_DIR_PATH+'topAdClickCnt.dict.final')])
    #adset = set(list(adset)[:2])
    blacklist = set(['20174985','3834142','3373964','4344041','8350700','2878230','3803920','20174982','4341158','6434934', '3219148','20035409'])
    adset = adset - blacklist
    feature = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature'
    featureTransfer = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature.transfer'
    featureTransferRelevance = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature.transfer.relevance'

    fn_status_tpl = TMP_DATA_DIR_PATH + 'status/%s.ad2userStatus.dat'
    selectionFeature = file(TMP_DATA_DIR_PATH + 'ranking/bm25.ranking', 'w')
    selectionFeatureTransfer = file(TMP_DATA_DIR_PATH + 'ranking/bm25.ranking.transfer', 'w')
    selectionFeatureTransferRelevance = file(TMP_DATA_DIR_PATH + 'ranking/bm25.ranking.transfer.relevance', 'w')
   
    logging.info('Dumping adid2idx')
    seeduserDict = {}
    writer = file(TMP_DATA_DIR_PATH + 'ranking/adid2idx.txt', 'w')
    for i, adid in enumerate(adset) :
        writer.write('%s\t%d\n' % (adid, i+1))
        fn_status = fn_status_tpl % adid 
        rset, nrset = getSeedUserSet(fn_status, adid)
        seeduserDict[adid] = rset.union(nrset)
    writer.close()

    dumpSelectedFeature(adset, seeduserDict, feature, selectionFeature) 
    dumpSelectedFeature(adset, seeduserDict, featureTransfer, selectionFeatureTransfer) 
    dumpSelectedFeature(adset, seeduserDict, featureTransferRelevance, selectionFeatureTransferRelevance)
Beispiel #8
0
def generate_default_templates(resources):
    """
    Create the default templates per resource that was retrieved from the `modw`.`resourcefact` table.

    :param resources: a tuple of (string,int) [name, id] retrieved from the `modw`.`resourcefact` table.
    :return: void
    """

    if not resources:
        logging.warning("No resources found. No files to create.")
    else:

        slurm_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates','template.{0}.inp.py'), 'slurm')
        pbs_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates', 'template.{0}.inp.py'), 'pbs')

        queues = {'slurm': slurm_template_contents, 'pbs': pbs_template_contents}

        for resource in resources:
            if args.verbose:
                logging.info("Creating Resource Template: {0} ", resource[0] + "")

            if not args.test:
                for queue, contents in queues.iteritems():

                    file_path = os.path.join(resources_dir, resource[0] + 'resource.inp.py')

                    create_resource_template(file_path, queue, contents)

        logging.info("Resource Template Generation Complete!")
Beispiel #9
0
def capture_scenario_params(scenario, ip_port):
    def launch_slaves():
        p = subprocess.Popen([
            './dev/venv/bin/python', 'run_param_slave.py', '-scenario',
            scenario, '-stop_frame', '0', '-no_logging', '-headless',
            protocol.k_arg_capture
        ])
        p.wait()

    def receive_param_loop(param_engine):
        try:
            while True:
                objects = receiver.receive_objects()
                handle_received_params(objects, param_engine)
        except KeyboardInterrupt:
            receiver.stop()

    def handle_received_params(objects, param_engine):
        for obj in objects:
            logging.info(" Captured '{}' '{}' '{}'".format(
                obj['name'], obj['space'], obj['dflt']))
            param_engine.add_sampler(obj['name'], obj['space'], obj['dflt'])
            logging.info(
                f" Total Combination count {param_engine.product_size}")

    logging.info(f"====== Capturing '{scenario}' scenario parameters ======")
    receiver = Receiver(ip_port)
    launch_slaves()
    param_engine = CartesianEngine()
    handle_received_params(receiver.receive_objects(), param_engine)
    receiver.stop()
    return param_engine
Beispiel #10
0
def off_parsed(args):
    """
    Handles the appropriate execution of an 'Off' mode request given
    the provided command line arguments.
    """
    data = {
        'application': args.application if args.application else ''
    }

    try:
        result = akrrrestclient.put(
            '/resources/{0}/off'.format(args.resource),
            data=data)

        if result.status_code == 200:
            message = 'Successfully disabled {0} -> {1}.\n{2}' if args.application and args.resource \
                else 'Successfully disabled all applications on {0}.\n{1}'
            parameters = (args.application, args.resource, result.text) if args.application and args.resource \
                else (args.resource, result.text)
            log.info(message, *parameters)
        else:
            log.error(
                'something went wrong. {0}:{1}',
                result.status_code,
                result.text)
    except StandardError, e:
        log.error('''
            An error occured while communicating
            with the REST API.
            {0}: {1}
            ''',
                  e.args[0] if len(e.args) > 0 else '',
                  e.args[1] if len(e.args) > 1 else '')
Beispiel #11
0
def open_file(file_path, privs):
    """
    Open and return the file handle to the file identified by the provided 'file_path' with the provided 'privs'.
    If an error occurs during the opening of the file then it will be logged and 'None' will be returned.

    :type file_path str
    :type privs str

    :param file_path: the path to be opened.
    :param privs: the privs with which to open the provided file path.
    :return: a file handle ( object ) if the open operation is successful else None.
    """
    if file_path and isinstance(file_path, basestring) and privs and isinstance(privs, basestring):

        # ADD: Some verbosity
        if args.verbose:
            logging.info("Opening with privs [{0}]: {1}", privs, file_path)

        try:

            # ATTEMPT: to open the file identified by the provided file_path
            file_handle = open(file_path, privs)

            # LET: the verbose users know we succeeded
            if args.verbose:
                logging.info('Successfully preformed open [{0}] on {1}', privs, file_path)

            # RETURN: the file_handle
            return file_handle
        except IOError, e:
            logging.error('Unable to open file: {0} due to {1}: {2}', file_path, e.args[0], e.args[1])
            return None
Beispiel #12
0
def query_parsed(args):
    """
    Handles the appropriate execution of a 'Query' mode request given
    the provided command line arguments.
    """
    dry_run = args.dryrun
    verbose = args.verbose
    resource = args.resource
    exact = args.exact

    def handle_results(results, dry_run):
        if dry_run:
            log.info("Would have inserted the following:")
            [log.info("Id: {0:<9}Name: {1}", r['id'], r['name']) for r in results]
        else:
            log.info("Inserting the following:")
            [log.info("Id: {0:<9}Name: {1}", r['id'], r['name']) for r in results]
            insert_resources(results)

    if verbose:
        log.info("Retrieving requested resource(s)...")

    if not resource:
        results = retrieve_resources()
        handle_results(results, dry_run)
    else:
        results = retrieve_resource(resource, exact)
        handle_results(results, dry_run)

    if verbose:
        log.info("Requested Operation Completed")
Beispiel #13
0
def run():
    Adset, UserSet = userStatusWorkflow.getPreSet(DATA_TRAINING)
    for adid in Adset:
        logging.info('Now handling AD:' + adid)
        featureExtracter.workflow(adid, False, False)
        featureExtracter.workflow(adid, True, False)
        featureExtracter.workflow(adid, True, True)
Beispiel #14
0
 def handle_received_params(objects, param_engine):
     for obj in objects:
         logging.info(" Captured '{}' '{}' '{}'".format(
             obj['name'], obj['space'], obj['dflt']))
         param_engine.add_sampler(obj['name'], obj['space'], obj['dflt'])
         logging.info(
             f" Total Combination count {param_engine.product_size}")
Beispiel #15
0
def create_resource_template(file_path, queue, contents):
    """

    :type file_path str
    :type queue str

    :param file_path:
    :param queue:
    :param contents:
    :return:
    """

    privs = 'w'
    output_path = file_path.format(queue)
    
    def update_template(s,variable,inQuotes=True):
        rexp='^'+variable+'\s*=\s*.*$'
        replace=variable+' = '
        value=globals()[variable]
        if value==None:
            replace+='None'
        else:
            if inQuotes:replace+='"'
            replace+=str(value)
            if inQuotes:replace+='"'
        out=[]
        lines=s.splitlines()
        for line in lines:
            out.append(re.sub(rexp,replace,line))
        #s=re.sub(rexp,replace,s,flags=re.M)
        
        return "\n".join(out)
    
    contents=update_template(contents,'ppn',inQuotes=False)
    for v in ['remoteAccessNode','remoteAccessMethod','remoteCopyMethod',
              'sshUserName','sshPassword','sshPrivateKeyFile','sshPrivateKeyPassword',
              'networkScratch','localScratch','akrrData','appKerDir','batchScheduler']:
              contents=update_template(contents,v)
    contents+="\n\n"
    
    #contents=re.sub(r'^ppn\s*=\s*.*$','ppn = %d'%ppn,contents,flags=re.M)
    #contents=re.sub(r'^remoteAccessNode\s*=\s*.*$','remoteAccessNode = "%s"'%remoteAccessNode,contents,flags=re.M)
    #contents=re.sub(r'^remoteAccessMethod\s*=\s*.*$','remoteAccessMethod = "%s"'%remoteAccessNode,contents,flags=re.M)
#     if 
    
    



    if not args.test:
        output_file = open_file(output_path, privs)
        write_to_file(output_file, contents)
        close_file(output_file)
    else:
        logging.info('Test Mode: Would have written to: {0}', output_path)
        logging.info('It content would be:')
        print contents
Beispiel #16
0
 def generate_self_signed_certificate(self):
     log.info("Generating self-signed certificate for REST-API")
     try:
         output=subprocess.check_output("which openssl", shell=True)
     except Exception,e:
         log.error("""openssl program is not available. Install it!
 For example by running on Ubuntu:
      sudo apt-get install openssl""")
         exit(1)
Beispiel #17
0
 def run(self):
     while self._stop_signalled == False:
         try:
             sample = self._queue.get_nowait()
             signal_sample_taken(self._lock, self._sample_holder)
             logging.info(f" #{self._index}: '{sample}'")
             self.launch_and_wait(sample)
         except queue.Empty:
             time.sleep(0)
Beispiel #18
0
def getUserFeatureSet():
    logging.info('=========start getUserFeatureSet processing=========')
    query_set_fn = TMP_DATA_DIR_PATH + 'queryID.set'
    desc_set_fn = TMP_DATA_DIR_PATH + 'descID.set'
    title_set_fn = TMP_DATA_DIR_PATH + 'titleID.set'
    query_set = set([query.strip() for query in file(query_set_fn)])
    desc_set = set([desc.strip() for desc in file(desc_set_fn)])
    title_set = set([title.strip() for title in file(title_set_fn)])
    return query_set, desc_set, title_set
def getUserFeatureSet() :
    logging.info('=========start getUserFeatureSet processing=========')
    query_set_fn = TMP_DATA_DIR_PATH + 'queryID.set'
    desc_set_fn = TMP_DATA_DIR_PATH + 'descID.set'
    title_set_fn = TMP_DATA_DIR_PATH + 'titleID.set'
    query_set = set([query.strip() for query in file(query_set_fn)])
    desc_set = set([desc.strip() for desc in file(desc_set_fn)])
    title_set = set([title.strip() for title in file(title_set_fn)])
    return query_set, desc_set, title_set
Beispiel #20
0
def load(file: str) -> bool:
    global settings
    filepath = file
    try:
        with open(file, "r") as sett_file:
            settings = json.load(sett_file)
        logging.info(f"Trying {filepath}.....Success!")
        return True
    except Exception as e:
        logging.info(f"Trying {filepath}.....Failed!")
        logging.error(str(e))
        return False
Beispiel #21
0
def new_task_parsed(args):
    """
    Handles the appropriate execution of a 'New Task' mode request
    given the provided command line arguments.
    """
    if not (args.resource and
            args.appkernel and
            args.nodes):
        parser.error(
            'Please provide a resource, application and node count.')
        exit(1)
    resource = args.resource
    app = args.appkernel
    time_to_start=args.start_time
    time_start = args.time_start# if args.time_start else '01:00'
    time_end = args.time_end# if args.time_end else '05:00'
    repeat_in = args.periodicity
    nodes = args.nodes
    node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else list(nodes)

    for node in node_list:
        if time_start!=None and time_end!=None:
            time_to_start = calculate_random_start_time(
                args.start_time,
                repeat_in,
                time_start,
                time_end)
        data = {
            'resource': resource,
            'app': app,
            'time_to_start': time_to_start,
            'repeat_in': repeat_in,
            'resource_param': "{'nnodes':%s}" % (node,)
        }
        try:
            result = akrrrestclient.post(
                '/scheduled_tasks',
                data=data)
            if result.status_code == 200:
                log.info('Successfully submitted new task')
            else:
                log.error(
                    'something went wrong. {0}:{1}',
                    result.status_code,
                    result.text)
        except StandardError, e:
            log.error('''
            An error occured while communicating
            with the REST API.
            {0}: {1}
            ''',
                      e.args[0] if len(e.args) > 0 else '',
                      e.args[1] if len(e.args) > 1 else '')
Beispiel #22
0
def wall_time_parsed(args):

    if not args.list and not (args.resource and
                              args.appkernel and
                              args.nodes and
                              args.walltime):
        parser.error(
            'Please provide a resource, app, node count and wall time.')
        exit(1)

    listing = args.list
    resource = args.resource
    app = args.appkernel
    nodes = args.nodes
    walltime = args.walltime
    comments = args.comments
    node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else list(nodes)

    for nodes in node_list:
        data = {
            'resource_params': "{'nnodes':%d}" % (int(nodes),) if nodes else "{}",
            'app_param':'{}',
            'walltime': walltime,
            'comments':comments
        }
        try:
            result = akrrrestclient.post(
                '/walltime/%s/%s'%(resource,app),
                data=data) if not listing else \
                akrrrestclient.get(
                    '/walltime/%s/%s'%(resource,app),
                    data=data)
            if result.status_code == 200:
                if not listing:
                    log.info('Successfully updated wall time (resource %s: application kernel: %s nodes: %d).'%(resource,app,nodes))
                else:
                    log.info(
                        'Successfully queried walltime records. \n{0}',
                        result.text)
            else:
                log.error('something went wrong. {0}:{1}',
                          result.status_code,
                          result.text)
        except StandardError, e:
            import traceback
            log.error('''
            An error occured while communicating
            with the REST API.
            {0}: {1}
            '''.strip(),
                      e.args[0] if len(e.args) > 0 else '',
                      e.args[1] if len(e.args) > 1 else '')
            print traceback.print_exc()
Beispiel #23
0
def Evaluation() :
    logging.info('===Evaluation Processing===')
    fn_rankingResult = TMP_DATA_DIR_PATH + 'SVMRanking.prediction'
    fn_userID4SVMRanking = TMP_DATA_DIR_PATH + 'userid4SVMRanking.dat'
    fn_adId2Idx = TMP_DATA_DIR_PATH + 'adid2Idx.dict'
    fn_ad2userStatus = TMP_DATA_DIR_PATH + 'ad2userStatus.dict'

    fn_out_ad2userCTR = TMP_DATA_DIR_PATH + "ad2userCTR.dict"
    fn_SVMRanking=TMP_DATA_DIR_PATH+'finalData4SVMRanking.dat'

    #calculate ctr distribution
    ctrDistribution(fn_SVMRanking, fn_rankingResult, fn_userID4SVMRanking, fn_adId2Idx, fn_ad2userStatus, fn_out_ad2userCTR)
Beispiel #24
0
def batch_job_parsed(args):
    if not (args.resource and
          args.appkernel and
          args.nodes):
        parser.error(
            'Please provide a resource, application kernel and node count.')
        exit(1)
    resource = akrr.FindResourceByName(args.resource)
    app = akrr.FindAppByName(args.appkernel)
    nodes = args.nodes
    node_list = [node.strip() for node in nodes.split(',')] if ',' in nodes else [int(nodes)]
    print_only=args.print_only
    verbose=args.verbose

    str_io=cStringIO.StringIO()
    if not verbose:
        sys.stdout = sys.stderr = str_io
    from akrrtaskappker import akrrTaskHandlerAppKer
#    taskHandler=akrrTaskHandlerAppKer(1,resource['name'],app['name'],"{'nnodes':%s}" % (node_list[0],),"{}","{}")
    # test arbitrary resourceParam like WLMheader stuff
    taskHandler=akrrTaskHandlerAppKer(1,resource['name'],app['name'],"{'SlUrM':'--foo','slurm':'#SBATCH --mail=maya','nnodes':%s}" % (node_list[0],),"{}","{}")
    if print_only:
        taskHandler.GenerateBatchJobScript()
    else:
        taskHandler.CreateBatchJobScriptAndSubmitIt(doNotSubmitToQueue=True)
    sys.stdout=sys.__stdout__
    sys.stderr=sys.__stderr__

    if taskHandler.status.count("ERROR")>0:
        log.error('Batch job script was not generated see log below!')
        print str_io.getvalue()
        log.error('Batch job script was not generated see log above!')


    jobScriptFullPath=os.path.join(taskHandler.taskDir,"jobfiles",taskHandler.JobScriptName)
    if os.path.isfile(jobScriptFullPath):
        fin=open(jobScriptFullPath,"r")
        jobScriptContent=fin.read()
        fin.close()

        if print_only:
            log.info('Below is content of generated batch job script:')
            print jobScriptContent
        else:
            log.info("Local copy of batch job script is "+jobScriptFullPath)
            print
            log.info("Application kernel working directory on "+resource['name']+" is "+taskHandler.remoteTaskDir)
            log.info("Batch job script location on "+resource['name']+" is "+os.path.join(taskHandler.remoteTaskDir,taskHandler.JobScriptName))
    else:
        log.error('Batch job script was not generated see messages above!')
    if print_only:
        log.info('Removing generated files from file-system as only batch job script printing was requested')
        taskHandler.DeleteLocalFolder()
Beispiel #25
0
 def load_road():
     self.road = RoadBuilder().load_and_build(
         self._get_map(), self._get_map_parameters())
     logging.info(f"road: {self.road.source}")
     if len(self.road.description):
         logging.info(" " + self.road.description)
     if len(self.road.asciiart):
         logging.info(" " + self.road.asciiart)
     if len(self.road.param_descriptions):
         logging.info(" " + str(self.road.param_descriptions))
     if len(self.road.params):
         logging.info(" " + str(self.road.params))
Beispiel #26
0
def Evaluation():
    logging.info('===Evaluation Processing===')
    fn_rankingResult = TMP_DATA_DIR_PATH + 'SVMRanking.prediction'
    fn_userID4SVMRanking = TMP_DATA_DIR_PATH + 'userid4SVMRanking.dat'
    fn_adId2Idx = TMP_DATA_DIR_PATH + 'adid2Idx.dict'
    fn_ad2userStatus = TMP_DATA_DIR_PATH + 'ad2userStatus.dict'

    fn_out_ad2userCTR = TMP_DATA_DIR_PATH + "ad2userCTR.dict"
    fn_SVMRanking = TMP_DATA_DIR_PATH + 'finalData4SVMRanking.dat'

    #calculate ctr distribution
    ctrDistribution(fn_SVMRanking, fn_rankingResult, fn_userID4SVMRanking,
                    fn_adId2Idx, fn_ad2userStatus, fn_out_ad2userCTR)
Beispiel #27
0
def update_publish_request_count():
    global request_count
    read_cnt = 0
    while True:
        read_cnt += 1
        request_count = publish.cryptowatch.request_count.get_request_count(
            minutes=5)
        if read_cnt % 10 == 0:
            logging.info('publish_request_count: {request_count}'.format(
                request_count=request_count))
            print('publish_request_count: {request_count}'.format(
                request_count=request_count))
        time.sleep(6)
Beispiel #28
0
def run(cfg, forcerun):
    tz = config.get_tz(cfg)

    while True:
        dt_str = str(util.time.get_utcnow().astimezone(tz).date())
        logging.info(cfg, 'checking if run for {dt_str} should be done'.format(dt_str=dt_str))
        if not forcerun and upload.daily.history.did_upload_today():
            logging.info(cfg, 'run for {dt_str} is already done'.format(dt_str=dt_str))
            time.sleep(10 * 60)
            continue

        t_run_after = config.get_daily_ingestion_start_t(cfg)
        while True:
            t_cur = util.time.get_utcnow().astimezone(tz).time()
            logging.info(cfg, 'checking if the schedule time for {dt_str} has reached'.format(dt_str=dt_str))
            if forcerun or t_cur > t_run_after:
                run_download(cfg)
                run_ingests_append_combine()
                run_upload(cfg)
                upload.daily.history.on_upload()
                break

            logging.info(cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'.format(t_run_after=t_run_after, t_cur=t_cur))
            time.sleep(10 * 60)

        if forcerun:
            # forcerun runs only once
            break
Beispiel #29
0
 def init_mysql_dbs(self):
     try:
         log.info("Creating AKRR databases and granting permissions for AKRR user.")
         
         db_root,cur_root=self.get_db(user=self.sql_root_name,password=self.sql_root_password)
         cur_root.execute("SHOW DATABASES")
         dbsNames=[v['Database'] for v in cur_root.fetchall()]
         
         cur_root.execute("SELECT @@hostname")
         results=cur_root.fetchall()
         hostname=results[0]['@@hostname']
         
         #create user if needed
         #cur_root.execute("SELECT * FROM mysql.user WHERE User=%s",(self.akrr_user_name,))
         #results=cur_root.fetchall()
         
         # ENSURE: That the `mod_akrr` database is created.
         if 'mod_akrr' not in dbsNames:
             cur_root.execute("CREATE DATABASE IF NOT EXISTS mod_akrr")
             while cur_root.nextset() is not None: pass
         # ENSURE: That the `mod_appkernel` database is created.
         if 'mod_appkernel' not in dbsNames:
             cur_root.execute("CREATE DATABASE IF NOT EXISTS mod_appkernel")
             while cur_root.nextset() is not None: pass
         # ENSURE: That the user that will be used by AKRR is created with the correct privileges.
         cur_root.execute("GRANT ALL ON mod_akrr.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, '%', self.akrr_user_password))
         cur_root.execute("GRANT ALL ON mod_akrr.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, 'localhost', self.akrr_user_password))
         cur_root.execute("GRANT ALL ON mod_akrr.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, hostname, self.akrr_user_password))
         
         while cur_root.nextset() is not None: pass
         # ENSURE: That the AKRR user has the correct privileges to the `mod_appkernel` database.
         cur_root.execute("GRANT ALL ON mod_appkernel.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, '%', self.akrr_user_password))
         cur_root.execute("GRANT ALL ON mod_appkernel.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, 'localhost', self.akrr_user_password))
         cur_root.execute("GRANT ALL ON mod_appkernel.* TO %s@%s IDENTIFIED BY %s",(self.akrr_user_name, hostname, self.akrr_user_password))
         
         while cur_root.nextset() is not None: pass
         # ENSURE: That the AKRR modw user is created w/ the correct privileges
         cur_root.execute("GRANT SELECT ON modw.resourcefact TO %s@%s IDENTIFIED BY %s",(self.xd_user_name, '%', self.xd_user_password))
         cur_root.execute("GRANT SELECT ON modw.resourcefact TO %s@%s IDENTIFIED BY %s",(self.xd_user_name, 'localhost', self.xd_user_password))
         cur_root.execute("GRANT SELECT ON modw.resourcefact TO %s@%s IDENTIFIED BY %s",(self.xd_user_name, hostname, self.xd_user_password))
         
         while cur_root.nextset() is not None: pass
         # ENSURE: That the newly granted privileges are flushed into active service.
         cur_root.execute("FLUSH PRIVILEGES")
         while cur_root.nextset() is not None: pass
         db_root.commit()
     except Exception,e:
         log.error("Can not execute the sql install script: "+str(e))
         exit(1)
Beispiel #30
0
def dec(filename):
    KEY = int(input("Input the key shared to you\n"))
    KEY = '{0:016b}'.format(int(hex(KEY),16))
    logging.info("Key recieved")
    logging.info("Decryption has started...")
    INV_BITSBOX = sbox_dec(filename)
    BITSHIFT = SHIFTRIGHT(INV_BITSBOX)
    BITXNOR = decXOR(BITSHIFT,KEY)
    BITS = BITXNOR
    #Using
    for x in tqdm(range(16)):
        BITS =  sbox_dec_rounds(BITS)
        BITS = SHIFTRIGHT(INV_BITSBOX)
        BITS = decXOR(BITSHIFT,KEY)
    file_converter_dec(BITS)
Beispiel #31
0
def enc(filename):
    KEY = create_key()
    print('Share this Key %s with the message reciever'%(KEY))
    KEY = '{0:016b}'.format(int(hex(KEY),16))
    logging.info("Key created")
    logging.info("Encryption has started...")
    BITXOR = encXOR(filename,KEY)
    BITSHIFT = SHIFTLEFT(BITXOR)
    BITSBOX = sbox_enc(BITSHIFT)
    BITS = BITSBOX
    for x in tqdm(range(16)):
        BITS = encXORrounds(BITS,KEY)
        BITS = SHIFTLEFT(BITXOR)
        BITS = sbox_enc(BITSHIFT)
    file_converter_enc(BITS)
Beispiel #32
0
    def read_akrr_creds(self):
        log.info("Before Installation continues we need to setup the database.")

        log.input("Please specify a database user for AKRR (This user will be created if it does not already exist):")
        self.akrr_user_name=raw_input('[{0}] '.format(self.default_akrr_user)) 
        if self.akrr_user_name=='':self.akrr_user_name=self.default_akrr_user
        
        while True:
            log.input("Please specify a password for the AKRR database user:"******"Please reenter password:"******"Entered passwords do not match. Please try again.")
Beispiel #33
0
 def read_modw_creds(self):
     log.input("Please specify the user that will be connecting to the XDMoD database (modw):")
     self.xd_user_name=raw_input('[{0}] '.format(self.default_akrr_user))
     if self.xd_user_name=='':self.xd_user_name=self.default_akrr_user
     if self.xd_user_name==self.akrr_user_name:
         log.info("Same user as for AKRR database user, will set same password")
         self.xd_user_password=self.akrr_user_password
     else:
         while True:
             log.input("Please specify the password:"******"Please reenter password:"******"Entered passwords do not match. Please try again.")
Beispiel #34
0
def list_parsed(args):
    """
    Handles the appropriate execution of a 'List' mode request given
    the provided command line arguments.
    """

    verbose = args.verbose
    resource = args.resource
    application = args.application
    status = args.status

    if verbose:
        log.info("Attempting to complete the requested Operation: ")

    if status:
        results = {}
        pass
    else:
        results = retrieve_tasks(resource, application)
        if results:
            log.info('Retrieved the following: ')
            [log.info("[{:<8}] Resource: {:<15} App:{:<24}",r['task_id'],  r['resource'], r['app']) for r in results]
        else:
            log.warning('No records returned.')

    if verbose:
        log.info("Requested Operation Completed")
Beispiel #35
0
def retrieve_tasks(resource, application):
    """
    Retrieve the list of currently scheduled tasks ( resource / application pairings ) from
    mod_akrr.

    :type resource str
    :type application str

    :param resource: filter the results by the provided resource
    :param application: filter the results by the provided application
    :return: a dict representation of the mod_akrr.SCHEDULEDTASKS table
    """
    data = {
        'application': application,
        'resource': resource
    }

    try:
        akrrrestclient.get_token()
    except StandardError:
        log.error('''
                An error occured while attempting to retrieve a token
                from the REST API.
                ''')
    try:
        result = akrrrestclient.get(
            '/scheduled_tasks',
            data=data)
        if result.status_code == 200:
            log.info('Successfully Completed Task Retrieval.\n{0}', result.text)

        else:
            log.error(
                'something went wrong. {0}:{1}',
                result.status_code,
                result.text)
        return result
    except StandardError, e:
        log.error('''
                An error occured while communicating
                with the REST API.
                {0}: {1}
                ''',
                  e.args[0] if len(e.args) > 0 else '',
                  e.args[1] if len(e.args) > 1 else '')
Beispiel #36
0
def sbox_dec(filename):
    logging.info("Reading file contents...")
    CONTENT = []
    with open(filename, 'r', encoding='utf-8') as FILE:
        for x in FILE:
            for y in x:
                CONTENT.append(y)
    BITS = []
    for x in CONTENT:
        BITS.append('{0:016b}'.format(int(ord(x))))
    RTRBITS = []
    for x in BITS:
        HEX0, HEX1, HEX2, HEX3 = string_split(x)
        F8B = '{0:08b}'.format(int(InvSbox[int(HEX0, 2)][int(HEX1, 2)]))
        L8B = '{0:08b}'.format(int(InvSbox[int(HEX2, 2)][int(HEX3, 2)]))
        joinBITS = F8B + L8B
        RTRBITS.append(joinBITS)
    return RTRBITS
Beispiel #37
0
 def generate_settings_file(self):
     log.info("Generating Settings File...")
     with open(os.path.join(akrr_home,'setup','scripts','akrr.src.inp.py'),'r') as f:
         akrr_inp_template=f.read()
     restapi_rw_password=self.get_random_password()
     restapi_ro_password=self.get_random_password()
     var={
         'akrr_user_name':self.akrr_user_name,
         'akrr_user_password':self.akrr_user_password,
         'xd_user_name':self.xd_user_name,
         'xd_user_password':self.xd_user_password,
         'restapi_rw_password':restapi_rw_password,
         'restapi_ro_password':restapi_ro_password
     }
     akrr_inp=akrr_inp_template.format(**var)
     with open(os.path.join(akrr_home,'cfg','akrr.inp.py'),'w') as f:
         akrr_inp_template=f.write(akrr_inp)
     log.info("Settings written to: {0}".format(os.path.join(akrr_home,'cfg','akrr.inp.py')))
def socket_entry_handler(syscall_id, syscall_object, pid):
    """Replay Always
    Checks:
    0: The domain of the socket
    Sets:
    return value: file descriptor of the new socket -1 (error)
        (added as replay file descriptor)
    errno

    Not Implemented:
    * Determine what is not implemented
    """
    logging.debug('Entering socket subcall entry handler')

    ecx = cint.peek_register(pid, cint.ECX)
    params = extract_socketcall_parameters(pid, ecx, 3)
    # Only PF_INET and PF_LOCAL socket calls are handled
    execution_is_PF_INET = (params[0] == cint.PF_INET)
    trace_is_PF_INET = (str(syscall_object.args[0]) == '[\'PF_INET\']')
    execution_is_PF_LOCAL = (params[0] == 1)  # define PF_LOCAL 1
    trace_is_PF_LOCAL = (str(syscall_object.args[0]) == '[\'PF_LOCAL\']')
    logging.debug('Execution is PF_INET: %s', execution_is_PF_INET)
    logging.debug('Trace is PF_INET: %s', trace_is_PF_INET)
    logging.debug('Exeuction is PF_LOCAL: %s', execution_is_PF_LOCAL)
    logging.debug('Trace is PF_LOCAL: %s', trace_is_PF_LOCAL)
    if execution_is_PF_INET != trace_is_PF_INET:
        raise ReplayDeltaError(
            'Encountered socket subcall with mismatch between '
            'execution protocol family and trace protocol family')
    if execution_is_PF_LOCAL != trace_is_PF_LOCAL:
        raise ReplayDeltaError(
            'Encountered socket subcall with mismatch between '
            'execution protocol family and trace protocol family')
    # Decide if we want to deal with this socket call or not
    if trace_is_PF_INET or \
       execution_is_PF_INET or \
       trace_is_PF_LOCAL or \
       execution_is_PF_LOCAL:
        noop_current_syscall(pid)
        fd = int(syscall_object.ret[0])
        logging.debug('File Descriptor from trace: %s', fd)
        apply_return_conditions(pid, syscall_object)
    else:
        logging.info('Ignoring non-PF_INET call to socket')
Beispiel #39
0
def generate_resource_config(resource_id, resource_name, queuing_system):
    logging.info("Initiating %s at AKRR"%(resource_name,))
    
    slurm_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates', 'template.{0}.inp.py'), 'slurm')
    pbs_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates', 'template.{0}.inp.py'), 'pbs')

    queues = {'slurm': slurm_template_contents, 'pbs': pbs_template_contents}
    

    if not args.test:
        os.mkdir(os.path.join(resources_dir, resource_name),0700)
    
    file_path = os.path.abspath(os.path.join(resources_dir, resource_name, 'resource.inp.py'))
    global resource_cfg_filename
    resource_cfg_filename=file_path
    
    create_resource_template(file_path, queues[queuing_system], queues[queuing_system])
        
    if not args.test:    
        #add entry to mod_appkernel.resource
        dbAK,curAK=akrr.getAKDB(True)
            
        curAK.execute('''SELECT * FROM resource WHERE nickname=%s''', (resource_name,))
        resource_in_AKDB = curAK.fetchall()
        if len(resource_in_AKDB)==0:
            curAK.execute('''INSERT INTO resource (resource,nickname,description,enabled,visible,xdmod_resource_id)
                        VALUES(%s,%s,%s,0,0,%s);''',
                        (resource_name,resource_name,resource_name,resource_id))
            dbAK.commit()
        curAK.execute('''SELECT * FROM resource WHERE nickname=%s''', (resource_name,))
        resource_in_AKDB = curAK.fetchall()
        resource_id_in_AKDB=resource_in_AKDB[0]['resource_id']
        #add entry to mod_akrr.resource
        db,cur=akrr.getDB(True)
            
        cur.execute('''SELECT * FROM resources WHERE name=%s''', (resource_name,))
        resource_in_DB = cur.fetchall()
        if len(resource_in_DB)==0:
            cur.execute('''INSERT INTO resources (id,xdmod_resource_id,name,enabled)
                        VALUES(%s,%s,%s,%s);''',
                        (resource_id_in_AKDB,resource_id,resource_name,0))
            db.commit()

            logging.info("Resource configuration is in "+file_path)
Beispiel #40
0
def close_file(file_handle):
    """
    Close the provided file_handle. If an error is encountered than a message will be logged to stdout.

    :type file_handle file

    :param file_handle: the file to be closed.
    :return: void
    """
    if file_handle and isinstance(file_handle, file):
        try:
            if args.verbose:
                logging.info('Attempting to close the file {0}', file_handle.name)

            file_handle.close()

            if args.verbose:
                logging.info('Successfully closed the file {0}', file_handle.name)
        except IOError, e:
            logging.error('There was an error encountered while closing {0}. {1}: {2}', file_handle.name, e.args[0], e.args[1])
Beispiel #41
0
def featureEngineering() :
    logging.info('===Feature Engineering Processing===')
    query_set, desc_set, title_set = getUserFeatureSet()
    aggregateUserfile = TMP_DATA_DIR_PATH + 'userRawFeature_test.dict'
    expandId2TokensResultFile = TMP_DATA_DIR_PATH + 'userRawExpandTokens_test.dict'
    expandFeatureId2Tokens(aggregateUserfile, expandId2TokensResultFile, query_set, desc_set, title_set)
    
    tmp_file = file(TMP_DATA_DIR_PATH + 'tmp', 'w')
    fn_userRawExpandTokens = expandId2TokensResultFile
    for line in file(fn_userRawExpandTokens) :
        userid, query, title, desc = line.strip().split('\x01')
        tmp_file.write(query)
        tmp_file.write('\n')
    tmp_file.close()

    lda = topicLDA.LDA(TMP_DATA_DIR_PATH + 'tmp')
    LDA.run(lda, num_topics=200, raw_corpus=TMP_DATA_DIR_PATH + 'tmp', fn_bow=TMP_DATA_DIR_PATH+'corpus.svmlight_test', fn_out_topic=TMP_DATA_DIR_PATH+'LDA_corpus.svmlight_test')
    os.system('rm ' + TMP_DATA_DIR_PATH + 'tmp')

    joinResult4SVMRanking(fn_trainFeature=TMP_DATA_DIR_PATH+'corpus.svmlight_test', 
    fn_ad2userStatus=TMP_DATA_DIR_PATH+'ad2userStatus_test.dict', fn_out_SVMRanking=TMP_DATA_DIR_PATH+'finalData4SVMRanking_test.dat')
Beispiel #42
0
def check_rw_db(connection_func, pre_msg, post_msg):
    """
    Check that the user has the correct privileges to the database
    at the end of the connection provided by 'connection_func'. Specifically, checking
    for read / write permissions ( and create table ).

    :type connection_func function
    :type pre_msg str
    :type post_msg str

    :param connection_func: the function that will provide a (connection, cursor) tuple.
    :param pre_msg:         a message to be provided to the user before the checks begin.
    :param post_msg:        a message to be provided to the user after the checks are successful
    :return: true if the database is available / the provided user has the correct privileges.
    """
    success = False
    log.info(pre_msg)

    try:
        connection, cursor = connection_func()

        try:
            with connection:
                result = cursor.execute("CREATE TABLE CREATE_ME(`id` INT NOT NULL PRIMARY KEY, `name` VARCHAR(48));")
                success = True if result == 0 else False

                if success:
                    log.info(post_msg, success)
                else:
                    log.error(post_msg, success)

        except MySQLdb.MySQLError, e:
            log.error("Unable to create a table w/ the provided username. {0}: {1}", e.args[0], e.args[1])

        connection, cursor = connection_func()
        try:
            with connection:
                cursor.execute("DROP TABLE CREATE_ME;")
        except MySQLdb.MySQLError, e:
            log.error("Unable to drop the table created to check permissions. {0}: {1}", e.args[0], e.args[1])
Beispiel #43
0
def dataCleaning() :
    logging.info('===Data Cleaning Processing===')
    input_file = DATA_TRAINING_SAMPLE
    adClickCntList = generateTopAdsUsersByClick(input_file)
    dumpList2File(adClickCntList, TMP_DATA_DIR_PATH + 'topAdClickCnt.dict')

    adSet = set()
    for line in file(TMP_DATA_DIR_PATH + 'topAdClickCnt.dict') :
        cnt, adid = line.strip().split()
        adSet.add(adid)
    logging.debug(len(adSet))
    ad2Users = generateAd2UsersGivenAdSet(input_file, adSet)
    dumpDict2File(ad2Users, TMP_DATA_DIR_PATH + 'ad2UsersGivenAdSet.dict')
    userDict = generateUser2AdGivenAd2User(TMP_DATA_DIR_PATH + 'ad2UsersGivenAdSet.dict', adViewThreshold = 10)
    dumpDict2File(userDict, TMP_DATA_DIR_PATH + 'user2AdGivenAd2User.dict')
    userSet = set()
    logging.debug(len(userSet))
    for line in file(TMP_DATA_DIR_PATH + 'user2AdGivenAd2User.dict') :
        user, ads = line.strip().split('\x01')
        userSet.add(user)
    
    dumpUserRawFeatureGivenUserSet(input_file, userSet, TMP_DATA_DIR_PATH + 'userRawFeature.dict')
def expandFeatureId2Tokens(aggregateUserfile, expandId2TokensResultFile, query_set, desc_set, title_set) :
    logging.info('=========start expandFeatureId2Tokens processing=========')
    description_map = dict([(line.strip().split('\t')) for line in file(DATA_DESCRIPTION) if line.split('\t',1)[0] in desc_set])
    logging.debug('Read %s Done.' % DATA_DESCRIPTION)
    query_map = dict([(line.strip().split('\t')) for line in file(DATA_QUERY) if line.split('\t',1)[0] in query_set])
    logging.debug('Read %s Done.' % DATA_QUERY)
    title_map = dict([(line.strip().split('\t')) for line in file(DATA_TITLE) if line.split('\t',1)[0] in title_set])
    logging.debug('Read %s Done.' % DATA_TITLE)

    #profile_map = dict([(line.strip().split('\t', 1)) for line in file(DATA_PROFILE) if line.split('\t')])
    dump_format = '%s\x01%s\x01%s\x01%s\n'
    expandId2TokensResult = file(expandId2TokensResultFile, 'w') 
    logging.debug('start joining tokens')
    for line in file(aggregateUserfile) :
        userID, tmp_str = line.strip().split('\x01')
        queryIDlist, titleIDlist, descIDList = tmp_str.split('\x02')
        queryExpandTokensStr = '|'.join([query_map[queryId] for queryId in queryIDlist.split('\t') if queryId != ''])
        titleExpandTokensStr = '|'.join([title_map[titleId] for titleId in titleIDlist.split('\t') if titleId != ''])
        descExpandTokensStr = '|'.join([description_map[descId] for descId in descIDList.split('\t') if descId != ''])
        expandId2TokensResult.write( dump_format % \
               (userID, queryExpandTokensStr, titleExpandTokensStr, descExpandTokensStr))
    expandId2TokensResult.close()
Beispiel #45
0
 def handle_results(results, dry_run):
     if dry_run:
         log.info("Would have inserted the following:")
         [log.info("Id: {0:<9}Name: {1}", r['id'], r['name']) for r in results]
     else:
         log.info("Inserting the following:")
         [log.info("Id: {0:<9}Name: {1}", r['id'], r['name']) for r in results]
         insert_resources(results)
def dumpSelectedFeature(adset, seeduserDict, feature_tpl, writer) :
    fn_status_tpl = TMP_DATA_DIR_PATH + 'status/%s.ad2userStatus.dat'
    for i, adid in enumerate(adset) :
        fn_status = fn_status_tpl % adid
        fn_feature = feature_tpl % adid
        logging.info('Handling %s \n %s' % (fn_status, fn_feature))
        allset = seeduserDict[adid]
        status = {}
        for line in file(fn_status) :
            adid, userid, click, pv = line.strip().split()
            if userid not in allset : continue
            if int(click) >= 1 :
                status[userid] = 1
            else :
                status[userid] = 0
        for featureLine in file(fn_feature) :
            userid, rest = featureLine.strip().split('\t',1)
            if userid not in status : continue
            features = rest.split()
            writer.write('%d qid:%d ' % (status[userid], i+1))
            writer.write(' '.join(['%d:%s' % (j+1,item) for j, item in enumerate(features)]))
            writer.write('\n')
    writer.close()
Beispiel #47
0
def rankingFeatureSelection():
    adset = set([
        line.strip().split()[1]
        for line in file(TMP_DATA_DIR_PATH + 'topAdClickCnt.dict.final')
    ])
    #adset = set(list(adset)[:2])
    blacklist = set([
        '20174985', '3834142', '3373964', '4344041', '8350700', '2878230',
        '3803920', '20174982', '4341158', '6434934', '3219148', '20035409'
    ])
    adset = adset - blacklist
    feature = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature'
    featureTransfer = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature.transfer'
    featureTransferRelevance = TMP_DATA_DIR_PATH + 'feature/%s.bm25.feature.transfer.relevance'

    fn_status_tpl = TMP_DATA_DIR_PATH + 'status/%s.ad2userStatus.dat'
    selectionFeature = file(TMP_DATA_DIR_PATH + 'ranking/bm25.ranking', 'w')
    selectionFeatureTransfer = file(
        TMP_DATA_DIR_PATH + 'ranking/bm25.ranking.transfer', 'w')
    selectionFeatureTransferRelevance = file(
        TMP_DATA_DIR_PATH + 'ranking/bm25.ranking.transfer.relevance', 'w')

    logging.info('Dumping adid2idx')
    seeduserDict = {}
    writer = file(TMP_DATA_DIR_PATH + 'ranking/adid2idx.txt', 'w')
    for i, adid in enumerate(adset):
        writer.write('%s\t%d\n' % (adid, i + 1))
        fn_status = fn_status_tpl % adid
        rset, nrset = getSeedUserSet(fn_status, adid)
        seeduserDict[adid] = rset.union(nrset)
    writer.close()

    dumpSelectedFeature(adset, seeduserDict, feature, selectionFeature)
    dumpSelectedFeature(adset, seeduserDict, featureTransfer,
                        selectionFeatureTransfer)
    dumpSelectedFeature(adset, seeduserDict, featureTransferRelevance,
                        selectionFeatureTransferRelevance)
Beispiel #48
0
def dumpSelectedFeature(adset, seeduserDict, feature_tpl, writer):
    fn_status_tpl = TMP_DATA_DIR_PATH + 'status/%s.ad2userStatus.dat'
    for i, adid in enumerate(adset):
        fn_status = fn_status_tpl % adid
        fn_feature = feature_tpl % adid
        logging.info('Handling %s \n %s' % (fn_status, fn_feature))
        allset = seeduserDict[adid]
        status = {}
        for line in file(fn_status):
            adid, userid, click, pv = line.strip().split()
            if userid not in allset: continue
            if int(click) >= 1:
                status[userid] = 1
            else:
                status[userid] = 0
        for featureLine in file(fn_feature):
            userid, rest = featureLine.strip().split('\t', 1)
            if userid not in status: continue
            features = rest.split()
            writer.write('%d qid:%d ' % (status[userid], i + 1))
            writer.write(' '.join(
                ['%d:%s' % (j + 1, item) for j, item in enumerate(features)]))
            writer.write('\n')
    writer.close()
Beispiel #49
0
def check_r_db(connection_func, pre_msg, post_msg):
    """
    Check that the user has the correct privileges to the database
    at the end of the connection provided by 'connection_func'.
    Specifically checking for read permissions.

    :type connection_func function
    :type pre_msg str
    :type post_msg str

    :param connection_func: the function that will provide a (connection, cursor) tuple.
    :param pre_msg:         a message to be provided to the user before the checks begin.
    :param post_msg:        a message to be provided to the user after the checks are successful
    :return: true if the database is available / the provided user has the correct privileges.
    """
    success = False
    log.info(pre_msg)

    try:
        connection, cursor = connection_func()

        try:
            with connection:
                result = cursor.execute("SELECT COUNT(*) FROM `modw`.`resourcefact`;")
                success = True if result >= 0 else False

                if success:
                    log.info(post_msg, success)
                else:
                    log.error(post_msg, success)

        except MySQLdb.MySQLError, e:
            log.error("Unable to select from `modw`.`resourcefact`. {0}: {1}", e.args[0], e.args[1])

    except MySQLdb.MySQLError, e:
        log.error("Unable to connect to Database. {0}: {1}", e.args[0], e.args[1])
Beispiel #50
0
def expandFeatureId2Tokens(aggregateUserfile, expandId2TokensResultFile,
                           query_set, desc_set, title_set):
    logging.info('=========start expandFeatureId2Tokens processing=========')
    description_map = dict([(line.strip().split('\t'))
                            for line in file(DATA_DESCRIPTION)
                            if line.split('\t', 1)[0] in desc_set])
    logging.debug('Read %s Done.' % DATA_DESCRIPTION)
    query_map = dict([(line.strip().split('\t')) for line in file(DATA_QUERY)
                      if line.split('\t', 1)[0] in query_set])
    logging.debug('Read %s Done.' % DATA_QUERY)
    title_map = dict([(line.strip().split('\t')) for line in file(DATA_TITLE)
                      if line.split('\t', 1)[0] in title_set])
    logging.debug('Read %s Done.' % DATA_TITLE)

    #profile_map = dict([(line.strip().split('\t', 1)) for line in file(DATA_PROFILE) if line.split('\t')])
    dump_format = '%s\x01%s\x01%s\x01%s\n'
    expandId2TokensResult = file(expandId2TokensResultFile, 'w')
    logging.debug('start joining tokens')
    for line in file(aggregateUserfile):
        userID, tmp_str = line.strip().split('\x01')
        queryIDlist, titleIDlist, descIDList = tmp_str.split('\x02')
        queryExpandTokensStr = '|'.join([
            query_map[queryId] for queryId in queryIDlist.split('\t')
            if queryId != ''
        ])
        titleExpandTokensStr = '|'.join([
            title_map[titleId] for titleId in titleIDlist.split('\t')
            if titleId != ''
        ])
        descExpandTokensStr = '|'.join([
            description_map[descId] for descId in descIDList.split('\t')
            if descId != ''
        ])
        expandId2TokensResult.write( dump_format % \
               (userID, queryExpandTokensStr, titleExpandTokensStr, descExpandTokensStr))
    expandId2TokensResult.close()
Beispiel #51
0
def run():
    global request_count

    while True:
        logging.info('starting run_polygon.py')
        p = subprocess.Popen(['go', 'run', 'run_ohlc.go', '4'],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             preexec_fn=os.setsid)
        t_pstart = datetime.datetime.now()
        append_if_p_killed = []

        th1 = threading.Thread(target=monitor_restart_trigger,
                               args=(
                                   p,
                                   append_if_p_killed,
                               ))
        th2 = threading.Thread(target=monitor_publish_request_count,
                               args=(p, t_pstart, append_if_p_killed))

        th1.start()
        th2.start()

        while True:
            th1.join(10)
            logging.info('end of the join thread monitor_restart_trigger')
            if append_if_p_killed:
                logging.warn(
                    'observed that the process was killed in monitor_restart_trigger thread.'
                )
                # TODO: finish
                # th2.cancel()
                break
            th2.join(10)
            logging.info(
                'end of the join thread monitor_publish_request_count')
            if append_if_p_killed:
                logging.warn(
                    'observed that the process was killed in monitor_publish_request_count thread.'
                )
                # th1.cancel()
                break
            logging.info(
                'end of the join loop, process was not killed in the loop')
Beispiel #52
0
def scene_pyscenario_update(sctx):
    scene = sctx['scene']
    if (sctx['frame'] == 0):
        scenario_name_pattern = get_scenario_arg()
        scenario_name, scenario_inst = make_scenario_instance(scenario_name_pattern)
        
        road__module_or_file = scenario_inst.get_map()
        road_params = scenario_inst.get_map_parameters()
        scene_road_update_file(sctx, road__module_or_file, road_params)
        
        scenario_inst._init(scene['coord'])
        logging.info(f"scenario: {scenario_name}")
        logging.info(" " + scenario_inst.get_description())
        
        monitor_names = arg_get('-monitors', '').split(',')
        monitor_names = [x for x in monitor_names if len(x.strip())]
        monitor_insts = []
        for monitor_name_pattern in monitor_names:
            monitor_name, monitor_inst = make_monitor_instance(monitor_name_pattern)
            monitor_inst._init(scenario_inst)
            monitor_insts.append(monitor_inst)
            logging.info(f"monitor: {monitor_name}")
            logging.info(" " + monitor_inst.get_description())
        
        scene['scenario'] = scenario_inst
        scene['monitors'] = monitor_insts
        
        scene['dt'] = 1.0/60.0
        scene['t'] = 0.0
        scene['frame'] = 0
        
    if scene['scenario'] is not None:
        t = scene['frame'] * scene['dt']
        scene['scenario']._step(scene['frame'], t, scene['dt'])
        for monitor in scene['monitors']:
            monitor.step(scene['frame'], t, scene['dt'])
        scene['frame'] = scene['frame'] + 1
    return True
Beispiel #53
0
def run(forcerun):
    cfg = config.load('config.us.polygon.yaml')
    tz = config.get_tz(cfg)

    while True:
        dt_str = str(util.time.get_utcnow().astimezone(tz).date())
        logging.info(
            cfg, 'checking if run for {dt_str} should be done'.format(
                dt_str=dt_str))
        if not forcerun and upload.daily.history.did_upload_today():
            logging.info(
                cfg, 'run for {dt_str} is already done'.format(dt_str=dt_str))
            time.sleep(30 * 60)
            continue

        t_run_after = config.get_daily_last_record_ingestion_start_t(cfg)
        while True:
            t_cur = util.time.get_now_time_tz()
            logging.info(
                cfg, 'checking if the schedule time for {dt_str} has reached'.
                format(dt_str=dt_str))
            dt = util.time.time_diff_seconds(t_run_after, t_cur)
            if forcerun or dt <= 0:
                run_ingests()
                run_upload()
                upload.daily.history.on_upload()
                break

            logging.info(
                cfg, 'schedule time {t_run_after} not yet reached at {t_cur}'.
                format(t_run_after=t_run_after, t_cur=t_cur))
            time.sleep(max(dt + 1, 60 * 60))

        if forcerun:
            # forcerun runs only once
            break
Beispiel #54
0
def run_download(cfg):
    logging.info(cfg, 'downloading us daily')
    download.download.download(download.download.DEST_DIR_DAILY)
    logging.info(cfg, 'download complete')
Beispiel #55
0
def Prediction():
    logging.info('===Prediction Processing===')
    features = TMP_DATA_DIR_PATH + 'finalData4SVMRanking.dat'
    model = TMP_DATA_DIR_PATH + 'SVMRanking.model'
    predictions = TMP_DATA_DIR_PATH + 'SVMRanking.prediction'
    SVM_RANK.svm_rank_classify(features, model, predictions)
Beispiel #56
0
def Training():
    logging.info('===Trainng Processing===')
    features = TMP_DATA_DIR_PATH + 'finalData4SVMRanking.dat'
    model = TMP_DATA_DIR_PATH + 'SVMRanking.model'
    SVM_RANK.svm_rank_learn(features, model, args=' -c 10 ')
Beispiel #57
0
def genTitleDesc(inputFile, adSet, userset=None):
    logging.info('Generating Click and Unclick Title Description For Per User')

    ad2Profile = {}
    titleSet = set()
    descSet = set()

    for line in file(inputFile):
        fields = line.strip().split()
        adid = fields[3]
        userid = fields[-1]
        if userid == '0' or adid not in adSet: continue
        if userid not in userset: continue
        if adid not in ad2Profile:
            ad2Profile[adid] = {
                'profile': {},
                'nonClick_profile': {},
                'click_profile': {}
            }
        profile = ad2Profile[adid]['profile']
        nonClick_profile = ad2Profile[adid]['nonClick_profile']
        click_profile = ad2Profile[adid]['click_profile']
        title = fields[-3]
        desc = fields[-2]
        titleSet.add(title)
        descSet.add(desc)
        if userid not in profile:
            profile[userid] = {'desc': set(), 'title': set()}
            nonClick_profile[userid] = {'desc': set(), 'title': set()}
            click_profile[userid] = {'desc': set(), 'title': set()}

        if title not in profile[userid]['title']:
            profile[userid]['title'].add(title)
            if int(fields[0]) > 0:
                click_profile[userid]['title'].add(title)
            else:
                nonClick_profile[userid]['title'].add(title)

        if desc not in profile[userid]['desc']:
            profile[userid]['desc'].add(desc)
            if int(fields[0]) > 0:
                click_profile[userid]['desc'].add(desc)
            else:
                nonClick_profile[userid]['desc'].add(desc)

    fn_DESC = DATA_DESCRIPTION
    fn_TITLE = DATA_TITLE

    expandDesc = dict()
    expandTitle = dict()

    for line in file(fn_DESC):
        tid, rest = line.strip().split()
        if tid not in descSet: continue
        expandDesc[tid] = rest

    for line in file(fn_TITLE):
        tid, rest = line.strip().split()
        if tid not in titleSet: continue
        expandTitle[tid] = rest

    writers = dict((
        ad,
        file(TMP_DATA_DIR_PATH +
             'user_title_desc/%s.user_title_desc.dat' % ad, 'w'))
                   for ad in adSet)

    for adid in ad2Profile:
        profile = ad2Profile[adid]['profile']
        nonClick_profile = ad2Profile[adid]['nonClick_profile']
        click_profile = ad2Profile[adid]['click_profile']
        for userid in profile:
            click_title = '|'.join(expandTitle[key]
                                   for key in click_profile[userid]['title'])
            title = '|'.join(expandTitle[key]
                             for key in profile[userid]['title'])
            nonclick_title = '|'.join(
                expandTitle[key] for key in nonClick_profile[userid]['title'])

            click_desc = '|'.join(expandDesc[key]
                                  for key in click_profile[userid]['desc'])
            desc = '|'.join(expandDesc[key] for key in profile[userid]['desc'])
            nonclick_desc = '|'.join(
                expandDesc[key] for key in nonClick_profile[userid]['desc'])
            writers[adid].write('\x01'.join([
                adid, userid, title, click_title, nonclick_title, desc,
                click_desc, nonclick_desc
            ]))
            writers[adid].write('\n')

    for ad in writers:
        writers[ad].close()
Beispiel #58
0
import util.logging as log
from database.connection import Database
import manager


log.line("#####################")
log.line("#                   #")
log.line("#   Scuti  Server   #")
log.line("#                   #")
log.line("#####################")


scuti.clients = {}
try:
    scuti.db = Database("localhost", "root", "", "scuti")
    log.info("Connected to database!")
    # Room
    manager.room.load_rooms()
    log.info("Room manager loaded!")
    # Users
    manager.user.load_users()
    log.info("User manager loaded!")
except Exception as e:
    log.error(str(e))
    exit()


log.line()
log.info("Server online!")
server = SimpleWebSocketServer("", 3000, Server)
server.serveforever()
Beispiel #59
0
def log_heartbeat(cfg):
    while True:
        logging.info(cfg, "us_finance_daily_miner: heartbeat message.")
        time.sleep(30 * 60)
Beispiel #60
0
def run_upload(cfg):
    logging.info(cfg, 'uploading us daily')
    upload.daily.upload.upload(cfg)
    logging.info(cfg, 'uplaod complete')