Ejemplo n.º 1
0
def main():
    args = get_args()
    if args.log_level:
        log.setLevel(logging.getLevelName(args.log_level))
    else:
        log.setLevel(logging.ERROR)

    func_name = args.func_name[0] if args.func_name else None
    clang_flags = get_clang_flags(args)
    log.info(f'clang_flags={clang_flags}')

    try:
        if args.input_file:
            infile = Path(args.input_file)
            if not infile.is_file():
                infile = args.directory / args.input_file
        else:
            infile = next(args.directory.glob('**/*main*.c'))
        assert (infile.is_file())
        log.info(f'infile={infile}')
        cur = parse(infile, args=clang_flags)

        target = select_target(func_name, cur)
        test_harness = codegen(target)
        input_text = read_input_file(cur)
        output(args, input_text, test_harness)
    except:
        log.exception(f'error generating test harness from {args.input_file}')
        exit(1)
Ejemplo n.º 2
0
def main():
    df = pandas.read_csv('notes.tsv', sep='\t')
    filtered = []
    if args.filter:
        for name_regex in args.filter.split(','):
            name_regex = f'^{name_regex}'
            filtered.append(df.loc[df['Bug'].str.contains(name_regex), :])
        df = pandas.concat(filtered)

    for i, row in df.iterrows():
        name = row['Name']
        bug = row['Bug']
        buggy_version = bug.split('-')[-2]
        assertion = row['Assert']

        dirname = os.path.join('functional', bug)

        # Copy to buggy folder
        src_dirname = os.path.join(dirname, name)
        buggy_dirname = os.path.join(dirname, f'buggy.{buggy_version}')
        if not os.path.isdir(buggy_dirname):
            try:
                print('copying', src_dirname, 'to', buggy_dirname)
                shutil.copytree(src_dirname, buggy_dirname)
            except:
                print('error, trying cp -r')
                subprocess.check_call(
                    args=['cp', '-r', src_dirname, buggy_dirname])

        patch = parse(src_dirname, buggy_dirname, assertion)
        assert_file = os.path.join(buggy_dirname, 'my_assert.patch')
        log.info(f'generated patch {assert_file}')
        open(assert_file, 'w').write(patch)
Ejemplo n.º 3
0
    def write_xml(self):
        for html_file in self._update_app.keys():
            self._current_app = html_file
            html_file_path = os.path.join(self._html_path,
                                          self._update_app[html_file]['name'])
            xml_file_path = os.path.join(self._xml_path, html_file + ".xml")
            soup = BeautifulSoup(open(html_file_path), "lxml")
            doc = xml.dom.minidom.Document()
            root = doc.createElement("root")  # 根节点

            for c in soup.body.children:
                if not c.name:
                    continue

                if c.name == "h1":  # 根节点
                    root.setAttribute('app', self.get_attrs(c))  # 添加APP名称属性
                    doc.appendChild(root)

                if c.name == "h2":  # 一级菜单
                    one_node = doc.createElement('node')
                    one_node_attrs = self.format_attrs(self.get_attrs(c))
                    for key, value in one_node_attrs.iteritems():
                        one_node.setAttribute(key, value)
                    root.appendChild(one_node)

                if c.name == "h3":
                    format_menu_dict = self.format_attrs(self.get_attrs(c))
                    # print self._menu_level
                    if self._menu_level == "2":  #二级菜单
                        # print "erji"
                        two_node = doc.createElement('node')
                        two_node_arrts = self.format_attrs(self.get_attrs(c))
                        for key, value in two_node_arrts.iteritems():
                            two_node.setAttribute(key, value)
                        one_node.appendChild(two_node)
                    if self._menu_level == "3":  # 三级菜单
                        three_node = doc.createElement('node')
                        three_node_arrts = self.format_attrs(self.get_attrs(c))
                        for key, value in three_node_arrts.iteritems():
                            three_node.setAttribute(key, value)
                        two_node.appendChild(three_node)
                    if self._menu_level == "4":  # 四级菜单
                        four_node = doc.createElement('node')
                        four_node_arrts = self.format_attrs(self.get_attrs(c))
                        for key, value in four_node_arrts.iteritems():
                            four_node.setAttribute(key, value)
                        three_node.appendChild(four_node)
                    if self._menu_level == "5":  # 五级菜单
                        pass
                    if self._menu_level == "6":  # 六级菜单
                        pass

            fp = open(xml_file_path, 'w')
            doc.writexml(fp,
                         indent='\t',
                         addindent='\t',
                         newl='\n',
                         encoding="utf-8")
            log.info("write " + html_file + ".xml" + " success")
Ejemplo n.º 4
0
 def post(self):
     subid = self.get_argument('sublineid')
     latlng = self.get_argument('latlng')
     sid , stationsinfo , buses = bus.get_busline_info(subid , latlng)
     log.info(str(buses))
     stations = [ '%(id)s,%(name)s' % station  for station in stationsinfo]
     self.render("bus_info.html" , items=buses, stationid=sid,
         sublineid=subid , stations = stationsinfo )
Ejemplo n.º 5
0
    def run(self, target, target_args):
        """
        Run Pin. Collect results in temporary file pin.log
        and return a list of trace locations (filepath:lineno:column).
        """
        if not target.is_file():
            log.error(f'No such file for target executable: {target}')
            return []

        if not self.exe.is_file():
            log.error(f'No such file for Pin executable: {self.exe}')
            return []
        if not self.lib.is_file():
            log.error(f'No such file for trace-pintool: {self.lib}')
            return []

        logfile = Path('pin.log')
        errorfile = Path('error.log')
        try:
            # Clear files if present from old executions
            if logfile.is_file():
                logfile.unlink()
            if errorfile.is_file():
                errorfile.unlink()

            # Run Pin
            cmd = f'{self.exe} -error_file {errorfile.absolute()} -t {self.lib} -o {logfile} -c -- {target.absolute()}'
            args = cmd.split() + target_args
            p = subprocess.Popen(args,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
            stdout, _ = p.communicate()
            return_code = p.returncode
            args_str = ' '.join(args)

            # Pin tool exits 1 on success ¯\_(ツ)_/¯ use errorfile to detect errors
            log.info(
                f'Got return code {return_code} running pin with command: "{args_str}"'
            )
            if errorfile.is_file():
                log.warn(f'Echoing Pin output stream:')
                for l in stdout.decode().splitlines():
                    log.warn(f'* {l}')
                errorfile.unlink()
                raise Exception(
                    f'Pin had an error while running. See {errorfile} for more information.'
                )
            if errorfile.is_file():
                errorfile.unlink()

            if not logfile.is_file():
                raise Exception(
                    f'Something went wrong running Pin -- {logfile} is missing.'
                )
            return parse_pinlog(logfile)
        finally:
            if logfile.is_file() and not self.keep_logfile:
                logfile.unlink()
Ejemplo n.º 6
0
def proc_music(content, resp):
    log.info('proc_music|%s' , content)
    func = douban.fm_music if content in douban.FM_CHANNEL else music.getmusics
    song , singer , ablum , music_url = func(content)
    resp['MsgType'] = 'music'
    m = {}
    m['Title'] = song
    m['Description'] = singer + ablum
    m['MusicUrl'] = m['HQMusicUrl'] = music_url
    resp['Music'] = m
Ejemplo n.º 7
0
def fm_music(channel_id):
    req = urllib2.Request(URL_FMT % channel_id  )
    log.info(URL_FMT % channel_id )
    string = urllib2.urlopen(req , timeout = 10).read() #utf-8
    s = json.loads(string) # unicode
    if  s['r'] != 0:
    	raise MsgException('系统繁忙')
    song = random.choice(s['song']) 
    items = song['title'] , song['artist'] , song['albumtitle'] ,song['url']
    return [item.encode('utf-8') for item in items]
Ejemplo n.º 8
0
 def post(self):
     bline = self.get_argument('busline')
     latlng = self.get_argument('latlng')
     resp = bus.get_bus_url(bline)
     log.info(str(resp))
     if resp["success"] != "true":
         self.render("msg.html" , msg=resp['msg'])
     elif resp['data'][0]['isopen'] == '0':
         msg = u'%s路线暂未开通查询服务' % bline
         self.render("msg.html" , msg=msg)
     else:
         self.render("bus_select.html" , items=resp['data'],
         latlng = latlng)
Ejemplo n.º 9
0
 def get(self):
     args = self.request.arguments
     timestamp = args['timestamp'][0]
     nonce = args['nonce'][0]
     echostr = args['echostr'][0]
     token = "AAAAAAAAAAAAAAAAAAHHHHHHHHHHHHH"
     sign = weixin.get_signature(token , timestamp, nonce)
     if sign == args['signature'][0]:
        self.write(echostr)
        log.info('check succ')
     else:
        self.write('signature is invalid')
        log.error('signature is invalid')
Ejemplo n.º 10
0
def codegen(target):
    """
    Generate code for parameter names and code statements
    """

    parameters = list(target.get_arguments())
    log.info(f'target function has {len(parameters)} parameters')

    decls, inits = stmtgen(parameters)
    call = callgen(target, parameters)

    template = '''
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// argi is used for iterating through the input arguments
int argi = 1;
int global_argc;
char **global_argv;

char *shift_argi() {{
    int old_argi = argi;
    argi++;
    assert(old_argi < global_argc);
    char *return_value = global_argv[old_argi];
    if (strcmp(return_value, "NULL") == 0) {{
        return_value = NULL;
    }}
    return return_value;
}}

int main(int argc, char **argv) {{
global_argc = argc;
global_argv = argv;

// declarations
{declarations}

// initializers
{initializers}

// call into segment
{call}
}}
'''
    sub = template.format(declarations='\n'.join(decls),
                          initializers='\n'.join(inits),
                          call=call)
    return sub
def do_reminder_filtering(sent_filename, exclude_filename, months, years):
    (hasnt_been_sent_reminder, hasnt_been_sent_reminder) = do_reminder_filtering_part1(sent_filename, exclude_filename, months, years, "REMINDER")
    
    (first_occurrence3, dupes3) = filter_unsubscribe_list(hasnt_been_sent_reminder)
    log.info("ELIMINATED because unsubscribe, n=" + str(len(dupes3)))
    log.info(list_of_emails(dupes3))
    (first_occurrence4, dupes4) = filter_exclude_list(first_occurrence3, exclude_filename)
    log.info("ELIMINATED because on exclude list, n=" + str(len(dupes4)))
    log.info(list_of_emails(dupes4))
    all_dupes = hasnt_been_sent_reminder + dupes3 + dupes4
    keepers = first_occurrence4
    log.info("KEEPING these, n=" + str(len(keepers)))
    log.debug(list_of_emails(keepers))
    return(keepers, all_dupes)
Ejemplo n.º 12
0
def select_target(func_name, cur):
    """
    Select target function with the given name from cur
    """
    funcdecls = find(cur, CursorKind.FUNCTION_DECL)
    if func_name:
        target = next((n for n in funcdecls if n.spelling == func_name), None)
        if target is None:
            raise Exception(f'no function named {func_name}')
    else:
        target = max(funcdecls,
                     key=lambda n: n.location.line if n.spelling != 'main' and
                     n.location.file.name.endswith('.c') else -1)
    log.info(f'target function: {pp(target)}')
    return target
Ejemplo n.º 13
0
def proc_event(req):
    event = req['Event']
    if event == 'subscribe':
        log.info('proc_event|%s|subscribe' , req['FromUserName'])
        resp = {}
        resp['ToUserName'] = req['FromUserName']
        resp['FromUserName'] = req['ToUserName']
        resp['CreateTime'] = int(time.time())
        resp['MsgType'] = 'text'
        resp['Content'] = '欢迎使用\n'+HELP
        return Dict2XMLTagText().toxml(resp)
    elif event == 'unsubscribe':
        log.info('proc_event|%s|unsubscribe' , req['FromUserName'])
    else:
        log.warning('proc_event|nuknown event %s|%s' , event ,req['FromUserName'])
    return   event  
Ejemplo n.º 14
0
 def _init_session(self):
     url = "http://www.simsimi.com/talk.htm?lc=ch"
     self.session = requests.Session()
     self.session.headers.update(Headers)
     #get cooket
     r = self.session.get(url ) #, timeout = 3)
     keys =  r.cookies.keys()
     if 'AWSELB' in keys and 'JSESSIONID' in keys:
         self.session.cookies.update( r.cookies )
         self.session.cookies['sagree'] = 'true'
         self.session.cookies['selected_nc'] = 'ch'
         log.info('SimSimi _init_session succ...')
     else:
         log.warning('SimSimi _init_session fail... try again')
         time.sleep(0.1)
         self.session = None
         self._init_session()
def do_reminder_filtering_part1(sent_filename, exclude_filename, months, years, reminder_string): 
    log.info("FILTERING with months=" + " ".join(months) + " and years=" + " ".join(years))
    all_initial_sent_records = get_sent_fields(sent_filename)
    log.info("STARTING with n=" + str(len(all_initial_sent_records)))

    initial_sent_records = get_filtered_dict(all_initial_sent_records, lambda k, v: k == "note" and v in ["INITIAL"])
    log.info("initial sent records, n=" + str(len(initial_sent_records)))
    articles_of_months = get_filtered_dict(initial_sent_records, lambda k, v: k == "data_month" and v in months)
    articles_of_years = get_filtered_dict(articles_of_months, lambda k, v: k == "year" and v in years)

    log.info("AFTER FILTERING for months and years, n=" + str(len(articles_of_years)))
    
    (has_been_sent_reminder_email, hasnt_been_sent_reminder) = email_already_sent_for_reminder(articles_of_years, sent_filename, reminder_string)
    log.info("ELIMINATED because ALREADY GOT REMINDER, n=" + str(len(has_been_sent_reminder_email)))
    #log.info(list_of_emails(has_been_sent_reminder_email))
    
    return(hasnt_been_sent_reminder, hasnt_been_sent_reminder)
Ejemplo n.º 16
0
def proc_location(req):
    resp = {}
    resp['ToUserName'] = req['FromUserName']
    resp['FromUserName'] = req['ToUserName']
    resp['CreateTime'] = int(time.time())
    resp['MsgType'] = 'news'
    resp['ArticleCount'] = 1
    art = {}
    item = {}
    item['Title'] = '深圳公交'
    item['Description'] = '公交车路线'
    item['PicUrl'] = ''
    latlng = req['Location_X'] + ',' +req['Location_Y']
    item['Url'] = 'http://1.zylweixin.sinaapp.com/bus?latlng=%s' % urllib.quote(latlng)
    art['item'] = [item]
    resp['Articles'] = [art]
    log.info(Dict2XMLTagText().toxml(resp))
    return Dict2XMLTagText().toxml(resp)
Ejemplo n.º 17
0
def parse(dirname, buggy_dirname, assertion):
    dirname = os.path.abspath(dirname)
    m = re.match(
        r'([^,]+),\s*(before|after)\s*line\s*([0-9]+)\s*\((.*)\),\s*(assert\(.*\);)',
        assertion)
    file_path = os.path.join(dirname, m.group(1))
    buggy_file_path = os.path.join(dirname, m.group(1))
    before_after = m.group(2)
    line_no = int(m.group(3))
    expr = m.group(4).strip()
    assert_stmt = m.group(5)
    assert_args = re.match(r'assert\((.*)\);', assert_stmt).group(1)
    my_assert_stmt = f'if (!{assert_args}) {{*((int*)0) = 0;}} // my_assert'
    my_assert_stmt += '\n'

    log.info(f'{before_after} {file_path}:{line_no} "{my_assert_stmt}"')

    fromlines = open(file_path, 'r').readlines()

    matchto = [l.strip() for l in fromlines[line_no - 2:line_no + 2]]
    matches = difflib.get_close_matches(expr, matchto)
    log.debug(f'close matching "{expr}"')
    assert (len(matches) > 0)
    new_line_no = line_no - 2 + matchto.index(matches[0]) + 1
    if new_line_no != line_no:
        log.warn(f'switched line number to {new_line_no}')
        line_no = new_line_no
    log.debug(f'close matched {file_path}:{line_no} "{fromlines[line_no-1]}"')

    if before_after == 'before':
        tolines = fromlines[:line_no - 1] + [my_assert_stmt
                                             ] + fromlines[line_no - 1:]
    elif before_after == 'after':
        tolines = fromlines[:line_no] + [my_assert_stmt] + fromlines[line_no:]
    else:
        log.critical(f'before_after is not valid: {before_after}')
        return

    unidiff = difflib.unified_diff(fromlines,
                                   tolines,
                                   fromfile=buggy_file_path,
                                   tofile=buggy_file_path)
    patch = ''.join(unidiff)
    return patch
Ejemplo n.º 18
0
    def do_wizard(_, args, install_sh):
        """
        Get Pin installation from root.
        If Pin is not at the expected location, do the interactive wizard with install_sh.
        """
        root = args.pin_root
        pin = Pin(args)
        if not pin.is_valid():
            log.warn(f'{root} is not a valid Pin installation.')
            if not install_sh.is_file():
                log.error(f'Could not execute {install_sh}.')
                exit(1)
            else:
                log.warn(
                    f'See {install_sh} for the recommended method for installing Pin.'
                )
                yn = input(
                    f'Should I install it at {root}? [type y to install, anything else to quit]: '
                )
                if yn == 'y':
                    cmd = f'bash {install_sh.absolute()} {root.name}'
                    log.debug(
                        f'Running Bash script install.sh with "{cmd}" in directory "{root}"'
                    )
                    proc = subprocess.Popen(cmd.split(),
                                            cwd=root.parent,
                                            stdout=subprocess.PIPE,
                                            stderr=subprocess.STDOUT)
                    stdout, _ = proc.communicate()
                    for l in stdout.decode().splitlines():
                        log.info(f'**[{install_sh.name}]** {l}')
                    if proc.returncode == 0:
                        log.info(f'Ran {install_sh} successfully.')
                    else:
                        log.error(f'Could not execute {install_sh}.')
                        exit(1)
                else:
                    exit(1)

        pin = Pin(args)
        if not pin.is_valid():
            log.error(f'Something is wrong with the Pin environment at {root}')

        return pin
Ejemplo n.º 19
0
def stmtgen(parameters):
    """
    Get declaration and initializer statements for the given parameters
    """
    decls = []
    inits = []

    for i, parm in enumerate(parameters):
        stmts = list(stmts_for_param(parm.type, parm.displayname))
        parm_decls, parm_inits = zip(*stmts)
        log.info(
            f'parameter {pp(parm)}({i}) produces {len(parm_decls)} local variable declarations and {len(parm_inits)} initializer statements'
        )
        for v, i in stmts:
            log.debug(f'local variable {v} has initializer(s) {i}')
        decls += (i for ilist in parm_decls for i in ilist)
        inits += (i for ilist in parm_inits for i in ilist)

    return decls, inits
def send_to_contact_list(fake_or_real, contact_list, subject, email_template, q, years, contact_note_append=""):
    ### DOESN'T HANDLE FILTERING BY YEARS YET!!!    
    log.debug("SENDING EMAIL TO GROUPS")
    for journal in set([d["journal"] for d in contact_list]):
        log.debug("JOURNAL: " + journal)
        journals_records = contact_corresponding.get_filtered_dict(contact_list, lambda k, v: k == "journal" and v == journal)
        for month in set([d["data_month"] for d in contact_list]):
            log.debug("MONTH: " + month)
            month_records = contact_corresponding.get_filtered_dict(journals_records, lambda k, v: k == "data_month" and v == month)
            log.debug("n = " + str(len(month_records)))
            represetative_sample = month_records[0]
            represetative_sample["q"] = q
            (survey_url, shortened_url) = get_survey_url(represetative_sample)
            represetative_sample["url"] = shortened_url
            email_html_body_template = open(email_template, "r").read()
            email_body = contact_corresponding.get_email_text(email_html_body_template, represetative_sample)
            log.debug(email_body[0:200])
            bcc_list = [d["single_email"] for d in month_records]
            log.debug("Length of bcc list:" + str(len(bcc_list)))
            log.debug("BCC list:")
            log.debug(bcc_list)
            if fake_or_real == "REAL":
                success = send_it_already(subject, email_body, bcc_list)
                if success:
                    log.info("******* SENT FOR REAL ***********")
                else:
                    log.info("******* FAILED TO SEND ***********")
            else:
                success = send_it_already(subject, email_body, ["*****@*****.**"])
                log.info("--- just sent it to myself--------")
            if success:
                update_sent_file(sent_filename, bcc_list, "\t".join([" ".join(years), month, journal, contact_note_append]))
Ejemplo n.º 21
0
def _proc_text(msg, resp):
    if msg in ('help' , 'Help'):
        raise MsgException(HELP)
    if msg in ('me' , 'Me'):
        raise MsgException(ABOUTME)
    if msg.startswith('bug'):
        log.info('BUG|%s' , msg)
        raise MsgException('感谢你的留意 ^_^ ')
    if msg.startswith('天气'):
        items = msg.split(None , 1)
        if len(items) == 2:
            resp['Content'] = weather.weather(items[1].strip())
        else:
            raise MsgException('查询天气格式: 天气 + 空格 + 城市名称\n如:天气 饶平')
    elif msg.startswith('音乐'):
        items = msg.split(None , 1)
        if len(items) == 2:
            proc_music(items[1].strip(), resp) 
        else:
            raise MsgException(MUSIC_MSG)
    else:
        resp['Content'] = xiaodou.chat(msg)
Ejemplo n.º 22
0
def output(args, input_text, test_harness):
    """
    Output test_harness to file or stdout, depending on args
    """
    outfile = args.output[0] if args.output else None

    raw_text = f'''
{input_text}
// test harness
{test_harness}
'''

    if outfile:
        log.info(f'writing to output file {outfile}')
        with open(outfile, 'w') as f:
            f.write(raw_text)
        if not args.no_format:
            if shutil.which('clang-format'):
                subprocess.call(
                    ['clang-format', outfile, '-i', '-style=Google'])
            else:
                log.warn('clang-format not found')
    else:
        log.info('generated test harness:')
        if not args.no_format:
            if shutil.which('clang-format'):
                tmp_filename = '/tmp/parse.py.fmt.c'
                log.info(f'writing to temporary file {tmp_filename}')
                with open(tmp_filename, 'w') as f:
                    f.write(raw_text)
                subprocess.call(
                    ['clang-format', tmp_filename, '-i', '-style=Google'])
                with open(tmp_filename, 'r') as f:
                    formatted_text = f.read()
                os.remove(tmp_filename)
                print(formatted_text)
            else:
                log.warn('clang-format not found')
        else:
            print(raw_text)
Ejemplo n.º 23
0
 def rm(path, filepath):
     for filename in filepath:
         if filename.split('.')[-1] in ["log", "pyc"]:
             log.info("clear " + filename)
             os.remove(os.path.join(path, filename))
def send_email(html_body, subject, to_addresses, cc_addresses, bcc_addresses, from_address):
    # Based on code from http://docs.python.org/library/email-examples.html
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText

    # Create message container - the correct MIME type is multipart/alternative.
    msg = MIMEMultipart('alternative')
    msg['Subject'] = subject
    msg['To'] = ', '.join(to_addresses)
    msg['Cc'] = ', '.join(cc_addresses)
    msg['From'] = from_address

    # Create the body of the message (a plain-text and an HTML version).
    html = """\
    <html>
      <head></head>
      <body>
        <p>""" + html_body + """\
        </p>
      </body>
    </html>
    """
    text = html2text.html2text(html_body)

    # Record the MIME types of both parts - text/plain and text/html.
    part1 = MIMEText(text, 'plain')
    part2 = MIMEText(html, 'html')

    # Attach parts into message container.
    # According to RFC 2046, the last part of a multipart message, in this case
    # the HTML message, is best and preferred.
    msg.attach(part1)
    msg.attach(part2)

    # gmail authentication here, but do not allow a customized From address:  http://kutuma.blogspot.com/2007/08/sending-emails-via-gmail-with-python.html
    #mailServer = smtplib.SMTP("smtp.gmail.com", 587)
    #mailServer.ehlo()
    #mailServer.starttls()
    #mailServer.ehlo()
    #password = ""  #password = input('please enter password')
    #mailServer.login("*****@*****.**", password)
   
    try:
        mailServer = smtplib.SMTP("shawmail.vc.shawcable.net")
        #mailServer = smtplib.SMTP("smtp.zoology.ubc.ca", 465)  # I think this also needs a login
        log.info("set smtp server")
    except:
        log.info("failed to set smtp server")
        mailServer = None

    # sendmail function takes 3 arguments: sender's address, recipient's address
    # and message to send - here it is sent as one string.
    success = False
    try:
        mailServer.sendmail(from_address, to_addresses + cc_addresses + bcc_addresses, msg.as_string())
        log.info("wrote an email:  \nFROM {0}, \nTO {1}, \nCC {2}, \nBCC {3} \n{4}".format(from_address, ",".join(to_addresses),",".join(cc_addresses), ",".join(bcc_addresses), msg.as_string()[0:200]))
        success = True
    except:
        log.info("EMAIL NOT SENT OR RECEIVED PROPERLY:  \nFROM {0}, \nTO {1}, \nCC {2}, \nBCC {3} \n{4}".format(from_address, ",".join(to_addresses), ",".join(cc_addresses), ",".join(bcc_addresses), msg.as_string()[0:200]))
        
    mailServer.quit()  
    return(success)
Ejemplo n.º 25
0
from gensim.models import Word2Vec
from mylog import log
from corpus import Corpus

import config

# 将关键词看做一个句子,使用CBOW法训练Word2Vec,然后将每篇文章的关键词的词向量加起来,作为该文章的特征向量
if __name__ == "__main__":
    # init database
    log.info("Initing corpus...")
    corpus = Corpus()
    log.info("Initing finished.")

    # configure modle
    model = Word2Vec(size=config.size,
                     window=config.window,
                     min_count=config.min_count,
                     workers=config.workers,
                     sg=config.sg,
                     max_vocab_size=config.max_vocab_size)

    # start training
    count = 0
    big_count = 0
    log.info("Start training...")
    if config.max_lines_corpus > 0:
        log.info("Only " + str(config.max_lines_corpus) +
                 " lines of corpus can be used.")
    else:
        log.info("No line limit for corpus.")
    for sentences in corpus.fetchSentences(config.per_lines_corpus):
def do_all_filtering(data_file, sent_filename, exclude_filename, months, years):
    log.info("FILTERING with months=" + " ".join(months) + " and years=" + " ".join(years))
    log.info("for data file " + data_file)
    all_records = get_isi_all_fields(data_file)
    log.info("STARTING with n=" + str(len(all_records)))
    unique = get_unique_items(all_records)
    articles = get_filtered_dict(unique, lambda k, v: k == "type" and v == "Article")
    articles_of_months = get_filtered_dict(articles, lambda k, v: k == "data_month" and v in months)
    articles_of_years = get_filtered_dict(articles_of_months, lambda k, v: k == "year" and v in years)
    one_email_per_row = get_one_email_per_row(articles_of_years)
    log.info("AFTER FILTERING for months and years, ONE EMAIL PER ROW, n=" + str(len(one_email_per_row)))
    (first_occurrence, dupes) = email_first_occurrence(one_email_per_row, True)
    log.info("ELIMINATED because not first occurance, n=" + str(len(dupes)))
    log.info(list_of_emails(dupes))
    (first_occurrence2, dupes2) = email_not_in_already_sent(first_occurrence, sent_filename)
    log.info("ELIMINATED because already sent, n=" + str(len(dupes2)))
    log.info(list_of_emails(dupes2))
    (first_occurrence3, dupes3) = filter_unsubscribe_list(first_occurrence2)
    log.info("ELIMINATED because unsubscribe, n=" + str(len(dupes3)))
    log.info(list_of_emails(dupes3))
    (first_occurrence4, dupes4) = filter_exclude_list(first_occurrence3, exclude_filename)
    log.info("ELIMINATED because on exclude list, n=" + str(len(dupes4)))
    log.info(list_of_emails(dupes4))
    all_dupes = dupes + dupes2 + dupes3 + dupes4
    keepers = first_occurrence4
    log.info("KEEPING these, n=" + str(len(keepers)))
    log.debug(list_of_emails(keepers))
    return(keepers, all_dupes)

   
    
        
Ejemplo n.º 27
0
def proc_text(req):
    resp = {}
    resp['ToUserName'] = req['FromUserName']
    resp['FromUserName'] = req['ToUserName']
    resp['CreateTime'] = int(time.time())
    resp['MsgType'] = 'text' #默认text
    try:
        msg = req['Content'].encode('utf-8')
        _proc_text(msg , resp)
    except MsgException , e:
        resp['MsgType'] = 'text'
        resp['Content'] = str(e)
    except Exception:
        log.exception('exception')
    log.info('textmsg|%s|%s|%s|%s' , req['FromUserName'] ,req['ToUserName'],
           msg, resp.get('Content' , '')  )
    return Dict2XMLTagText().toxml(resp)


#5、事件推送
EventMsgxml = '''
<xml><ToUserName><![CDATA[toUser]]></ToUserName>
<FromUserName><![CDATA[FromUser]]></FromUserName>
<CreateTime>123456789</CreateTime>
<MsgType><![CDATA[event]]></MsgType>
<Event><![CDATA[EVENT]]></Event>
<EventKey><![CDATA[EVENTKEY]]></EventKey>
</xml>'''
def proc_event(req):
    event = req['Event']
    if event == 'subscribe':
Ejemplo n.º 28
0
    def updatehtml(self, argv=utils.Command.HELP):
        '''
        update the html.json
        
        '''
        log.info("updatehtml")
        if argv == utils.Command.HELP:
            self.help()
            return False

        html_file_path = argv  # html文件路径
        html_file_list = []  # 所有html文件列表
        json_data = {}  # json文件数据
        json_file_path = os.path.join(html_file_path,
                                      "html.json")  # html.json文件名加路径

        # html路径正确
        if os.path.exists(html_file_path):
            # 获取所有的html文件名
            html_file_list = FileOperation.get_html_files(html_file_path)
            # log.debug(html_file_list)

            # 获取html.json文件内容,文件不存在则赋值内容模板
            if not os.path.exists(json_file_path):
                json_data = utils.DataTpl.HTML_INFO_JSON
            else:
                json_data = utils.FileOperation.load_json(json_file_path)

            # 更新html.json的info信息
            json_data["info"] = {
                "time":
                utils.get_current_date() + " " + utils.get_current_time(),
                "count": len(html_file_list),
                "path": html_file_path
            }
            json_data['update_app'] = {}  #初始化json的update_app信息

            # 遍历html文件列表,更新到json文件中
            for html_file in html_file_list:
                html_file_name = html_file.split(".")[0]  # 去掉html后缀
                # 获取html文件的md5值,用于判断该文件是否更改
                html_file_md5 = FileOperation.get_mad5(
                    os.path.join(html_file_path, html_file))
                if html_file_name in json_data["all_app"].keys(
                ):  # json文件中已保存过该文件
                    if html_file_md5 != json_data["all_app"][html_file_name][
                            "md5"]:  # 如果md5值不同,则说明更改过
                        json_data["all_app"][html_file_name][
                            "md5"] = html_file_md5
                        json_data["update_app"][html_file_name] = {
                            "name": html_file,
                            "md5": html_file_md5
                        }
                else:
                    json_data["all_app"][html_file_name] = {
                        "name": html_file,
                        "md5": html_file_md5
                    }
                    json_data["update_app"][html_file_name] = {
                        "name": html_file,
                        "md5": html_file_md5
                    }
            # 从json中移除已删掉的html文件
            for app in json_data['all_app'].keys():
                if app + ".html" not in html_file_list:
                    json_data["all_app"].pop(app)

            # log.info(json_data)
            # 保存
            utils.FileOperation.store_json(json_file_path, json_data)
            if json_data['update_app'].keys():
                log.info("更新的html文件有: {0}".format(
                    str(json_data['update_app'].keys())))
        else:
            log.error("File '{0}' not exists !".format(html_file_path))
            self.help()
            return False
Ejemplo n.º 29
0
 def help(self, argv=utils.Command.HELP):
     '''
     display the usage message
     '''
     log.info(utils.Command.USAGE_INFO)
Ejemplo n.º 30
0
def savePed(ped_list, time):
    log_msg = "Time " + str(time) + "----\n"
    for ped in ped_list:
        log_msg = log_msg + str(ped) + "\n"
    log.info(log_msg)
Ejemplo n.º 31
0
        return self.sentence2spec_vec(sentence)

    def genSpecVec(self, origin_line):
        '''Calculate spec_vec for a line.
        @param
            origin_line: an origin line fetched from database, like ("asngy033", "zzy", "bhuv", ...).
        @return
            The spec_vec for the line, in the format like [0.3, 0.1, -3.2, ...].
            The dim is determined by Word2Vec's model.
        '''
        line = Corpus.addFieldForSingle(origin_line)
        return self.int_genSpecVec(line)


if __name__ == "__main__":
    log.info("Loading model...")
    spec_vec_gen = SpecVecGen(config.model_path)
    log.info("Load finished.")

    log.info("Initing corpus...")
    corpus = Corpus()
    log.info("Init finished.")

    import json
    with open(config.output_path, "w") as fd:
        count = 0
        for origin_line in corpus.fetchOriginLines(1):
            spec_vec = spec_vec_gen.genSpecVec(origin_line[0])
            json.dump(spec_vec, fd)
            break