def parse(fcode):
    url = "http://www.clcindex.com/category/%s/" % fcode
    ip = {"http": random_ip()}
    try:
        res = requests.get(url, proxies=ip, timeout=3)
        if res.status_code == 200:
            print(url)
            print(ip)
            html = Selector(res.text, 'html')
            clc_nos = html.xpath(
                "//tr[@name='item-row']//td[2]/text()").extract()
            for i, clc_no in enumerate(clc_nos):
                clc_no = clc_no.replace("\t", "").replace("\n", "")
                clc_name = html.xpath("//tr[@name='item-row']//td[3]//text()"
                                      ).extract()[i].replace("\t", "").replace(
                                          "\n", "")
                sql = "insert or replace into clc(fcode,info) values ('%s','%s')" % (
                    clc_no, clc_name)
                curser = conn.cursor()
                curser.execute(sql)
                conn.commit()
                utils.printf("%s 插入成功" % clc_no)
            sql_up = "update clc set stat = 1 where fcode = '%s'" % fcode
            curser = conn.cursor()
            curser.execute(sql_up)
            conn.commit()
        else:
            print("ip err")
    except Exception as e:
        print(e)
Ejemplo n.º 2
0
 def startdown_list(self, message):
     utils.printf('%s:开始下载列表页...' % self.provider)
     if not self.list_path:
         self.initpath()
     self.refreshproxypool()
     self.count = 0
     conn = utils.init_db('mysql', 'apsjournal')
     cur = conn.cursor()
     current_year = time.strftime('%Y')
     cur.execute(
         "select url,stat from issue where stat=0 or year=%s or year=%s" %
         (current_year, int(current_year) - 1))
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         if len(os.listdir(self.list_path)) == 0:
             utils.logerror('%s:没有新的issue不需要更新' % self.provider)
         else:
             self.sendwork('parse_list')
     for url, _ in rows:
         fdir = self.list_path + '/' + url.split('/')[-4]
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         fname = fdir + '/' + url.split('/')[-2] + '_' + url.split(
             '/')[-1] + '.html'
         self.sendwork('down_list', (url, fname))
Ejemplo n.º 3
0
 def down_detail(self):
     utils.printf("下载详情页开始...")
     super().down_detail()
     conn = utils.init_db('mysql', 'cqjtu_kingbook')
     cur = conn.cursor()
     while True:
         cur.execute(
             'select bookid,stat from book where stat=0 limit 10000')
         rows = cur.fetchall()
         conn.commit()
         if len(rows) == 0:
             break
         for bookid, _ in rows:
             print(bookid)
             url = 'http://123.56.143.23/kingbookwaiwen/book/info.aspx?id={}'.format(
                 bookid)
             dirname = '%s/%s' % (self.detail_path, bookid[:3])
             if not os.path.exists(dirname):
                 os.makedirs(dirname)
             filename = '%s/%s.html' % (dirname, bookid)
             if os.path.exists(filename):
                 sql = 'update book set stat=1 where bookid="{}"'.format(
                     bookid)
                 cur.execute(sql)
                 conn.commit()
                 continue
             resp = utils.get_html(url, proxies=self.proxy)
             if not resp:
                 continue
             with open(filename, mode='w', encoding='utf8') as f:
                 f.write(resp.content.decode())
             sql = 'update book set stat=1 where bookid="{}"'.format(bookid)
             cur.execute(sql)
             conn.commit()
             utils.printf("下载", bookid, "成功...")
Ejemplo n.º 4
0
    def _summary_tvalues(self):
        '''
        Summary t-values
        :return:
        '''
        print('')
        utils.printf('=== Summary T-Values ===')

        cols = [
            'MIN', 'MEDIAN', 'MEAN', 'MAX', 'STD. DEV.', 'T-TEST', 'P(+)',
            'P(-)'
        ]
        df = pd.DataFrame(index=self.tvalues.keys(), columns=cols)
        for k, v in self.tvalues.items():
            tstats = self.model.tvalues[k]
            df.loc[k, 'MIN'] = round(min(v), 4)
            df.loc[k, 'MEDIAN'] = round(np.median(v), 4)
            df.loc[k, 'MEAN'] = round(np.mean(v), 4)
            df.loc[k, 'MAX'] = round(max(v), 4)
            df.loc[k, 'STD. DEV.'] = round(np.std(v), 4)
            df.loc[k, 'T-TEST'] = round(tstats, 4)
            df.loc[k, 'P(+)'] = round(np.mean([int(c >= tstats) for c in v]),
                                      4)
            df.loc[k, 'P(-)'] = round(np.mean([int(c <= tstats) for c in v]),
                                      4)

        print(df)
    def parse_index(self, message):
        try:
            utils.printf('%s:解析索引页开始...' % self.provider)
            conn = utils.init_db('mysql', 'hepengineeringjournal', 4)
            self.sqlList.clear()
            cur = conn.cursor()
            for filename, fullname in utils.file_list(self.index_path):
                with open(fullname, encoding='utf8') as f:
                    text = f.read()
                dic = json.loads(text, encoding='utf-8')
                gch = filename.replace('.json', '')
                dicitem = dic['resultValue']
                issn = dicitem['issnNm']
                cnno = dicitem['cnNm']
                sql = 'update journal set issn="%s",cnno="%s" where journal_id="%s"' % (
                    issn, cnno, gch)
                cur.execute(sql)
                conn.commit()

            cur.close()
            conn.close()
            utils.printf('%s:解析索引页完成...' % self.provider)
            # self.sendwork('down_cover')
            self.senddistributefinish('get_issuelist')
        except:
            exMsg = '* ' + traceback.format_exc()
            print(exMsg)
            utils.logerror(exMsg)
 def startdown_detail(self, message):
     if not self.detail_path:
         self.initpath()
     self.sqlList.clear()
     self.refreshproxypool()
     self.count = 0
     conn = utils.init_db('mysql', 'hepengineeringjournal', 4)
     cur = conn.cursor()
     cur.execute(
         'select article_id,journal_id from article where stat=0 and failcount<3'
     )
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         utils.printf('%s:下载详情页完成' % self.provider)
         # self.sendwork('parse_detail_meta')
         self.sendwork('parse_detail')
         # self.sendwork('down_cover')
         return
     messagelist = []
     for article_id, journal_id in rows:
         fdir = '%s/%s' % (self.detail_path, journal_id)
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         messagelist.append((article_id, journal_id))
         if len(messagelist) == 30:
             blist = messagelist.copy()
             self.sendwork('down_detail', blist)
             # utils.printf('a'+len(messagelist))
             # utils.printf(messagelist)
             messagelist.clear()
     if len(messagelist) > 0:
         self.sendwork('down_detail', messagelist)
Ejemplo n.º 7
0
 def run(self):
     conn = utils.init_db('mysql', 'aipjournal')
     cur = conn.cursor()
     sql = "select url,stat from issue where stat=0 limit 1000;"
     time_last = time.time()
     cnt = 0
     while True:
         if url_queue.empty():
             cur.execute(sql)
             rows = cur.fetchall()
             conn.commit()
             if rows:
                 for row in rows:
                     url_queue.put(row)
             elif sql_queue.empty():
                 break
         time_now = time.time()
         if (sql_queue.qsize() > 100) or (time_now - time_last > 60):
             num = sql_queue.qsize()
             while num > 0:
                 url, flag = sql_queue.get()
                 cur.execute(
                     "update issue set stat={} where url='{}'".format(
                         flag, url))
                 cnt += 1
                 num -= 1
             conn.commit()
             utils.printf('succssed:%d' % (cnt))
             time_last = time.time()
         time.sleep(1)
Ejemplo n.º 8
0
def down_cover():
    conn = pymysql.connect(DBHOST, DBUSER, DBPWD, DB)
    now_time = time.strftime('%Y%m%d')
    dirpath = cover_path + '/' + now_time
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    sql_up = "update video set cover_stat = 1 where rawid = %s"
    result = []
    while True:
        sql = "select rawid,cover_url from video where cover_stat=0 limit 1000"
        cur = conn.cursor()
        cur.execute(sql)
        rows = cur.fetchall()
        if len(rows) == 0:
            break
        else:
            for rawid, cover_url in rows:
                path = dirpath + '/%s.jpg' % rawid
                res = utils.get_html(cover_url, proxies=proxy, timeout=50)
                if res:
                    if os.path.exists(path):
                        result.append((rawid))
                        utils.printf("该", rawid, "存在...")
                    else:
                        if utils.Img2Jpg(res.content, path):
                            result.append((rawid))
                            utils.printf("下载", rawid, "成功...")
                        else:
                            print('%s -- down cover error' % rawid)
                if utils.parse_results_to_sql(conn, sql_up, result, 100):
                    total = len(result)
                    result.clear()
                    print('更新 ', total, ' 个结果到数据库成功')
            utils.parse_results_to_sql(conn, sql_up, result)
            print('更新 ', len(result), ' 个结果到数据库成功')
Ejemplo n.º 9
0
    def down_index(self, message):
        try:
            journalname = message[0]
            year = message[1]
            fname = self.index_path + '/' + journalname + '_' + str(
                year) + '.html'
            utils.printf('开始下载 %s' % fname)
            if os.path.exists(fname):
                self.senddistributefinish('process_index')
                return
            feature = 'issue-month-detail'
            url = 'http://{}.sciencemag.org/content/by/year/{}'.format(
                journalname, str(year))
            resp = self.gethtml(url, feature)
            if not resp:
                self.sendwork('down_index', (journalname, year))
                return

            with open(fname, mode='w', encoding='utf8') as f:
                f.write(resp.content.decode('utf8'))
            utils.printf('下载 %s 成功' % fname)
            self.senddistributefinish('process_index')
        except:
            exMsg = '* ' + traceback.format_exc()
            print(exMsg)
            utils.logerror(exMsg)
Ejemplo n.º 10
0
    def handleMenuItem_(self, ns_item):
        '''ObjC callback to handle and dispatch C{NSMenuItem}
           clicks and shortcuts.

           All clicks and shortcuts are dispatched to the I{action}
           method of this I{NSDelegate}'s L{App} instance.

           Unhandled clicks, shortcuts and dispatch errors are
           silently ignored, unless L{App} C{raiser} keyword
           argument was C{True}.
        '''
        item = ns2Item(ns_item)
        act = item._action
        for t, i in ((self.app, item), (self, ns_item)):
            m = getattr(t, act, None)
            if m and callable(m):
                try:
                    m(i)
                    break
                except Exception:
                    if _Globals.raiser:
                        printf('%s(%r): %r method %s ...',
                               _handleMenuItem_name, i, t, act)
                        raise
        else:
            if _Globals.raiser:
                raise RuntimeError('%s(%r): %s' % ('unhandled', item, act))
Ejemplo n.º 11
0
def parse_detal():
    for file, fullpath in utils.file_list(detailpath):
        j_id = file.replace(".html", '')
        with open(fullpath, encoding='utf8') as f:
            text = f.read()
        html = Selector(text, 'html')
        title = html.xpath("//h3/text()").extract_first("")
        title_en = html.xpath("//h4/text()").extract_first("").replace(
            "'", "''")
        div = html.xpath("//div[@class='perinfo']/text()").extract()
        zbdw = dq = issn = cn = shijian = ""
        for item in div:
            if item.startswith("主办单位:"):
                zbdw = item.replace("主办单位:", "")
            if item.startswith("地区:"):
                dq = item.replace("地区:", "")
            if item.startswith("国际刊号:"):
                issn = item.replace("国际刊号:", "")
            if item.startswith("国内刊号:"):
                cn = item.replace('国内刊号:', '')
            if item.startswith("出版周期:"):
                shijian = item.replace("出版周期:", "")
        # utils.printf(title,title_en,zbdw,dq,issn,cn,shijian)
        sql = "update journal set 期刊名称_外文 = '%s' , 主办单位 = '%s' , 地区 = '%s' , 国际刊号 = '%s' , 国内刊号 = '%s' , 出版周期 = '%s' where 期刊id = '%s'" % (
            title_en, zbdw, dq, issn, cn, shijian, j_id)
        curser = db.cursor()
        curser.execute(sql)
        curser.commit()
        utils.printf("更新%s信息成功" % title)
Ejemplo n.º 12
0
 def startdown_list(self, message):
     utils.printf('%s:开始下载列表页...' % self.provider)
     if not self.list_path:
         self.initpath()
     self.refreshproxypool()
     self.count = 0
     conn = utils.init_db('mysql', 'science')
     cur = conn.cursor()
     cur.execute('select url,stat from issue where stat=0')
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         if len(os.listdir(self.index_path)) == 0:
             utils.logerror('%s:没有新的issue不需要更新' % self.provider)
             utils.msg2weixin('%s:没有新的issue不需要更新' % self.provider)
         else:
             self.sendwork('parse_list')
     for url, _ in rows:
         fdir = self.list_path + '/' + url.split('.')[0]
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         fname = fdir + '/' + url.split('/')[-2] + '_' + url.split(
             '/')[-1] + '.html'
         url = 'http://' + url
         self.sendwork('down_list', (url, fname))
def get_year2que():
    for year in range(1949, 2021):
        base_url = dic_journal['Pharmacological Reviews']
        url = base_url + '/%s' % str(year)
        message = (str(year), url)
        year_que.put(message)
        utils.printf("%s年url添加成功!~" % year)
Ejemplo n.º 14
0
 def parse_detail(self, message):
     conn = utils.init_db('mysql', 'hepjournal', 4)
     cur = conn.cursor()
     cur.execute(
         'select journal_id,journal_name,issn,eissn,cnno from journal')
     rows = cur.fetchall()
     for journal_id, journal_name, issn, eissn, cnno in rows:
         self.dic[journal_id] = (journal_name, issn, eissn, cnno)
     cur.close()
     conn.close()
     self.predb3()
     self.sqlList.clear()
     stmt = """insert or ignore into modify_title_info_zt(lngid, rawid, creator, title, volume, issue, page, beginpage,
     endpage, publisher, subject, date,creator_institution, date_created, source, identifier_pissn, identifier_eissn,
     identifier_cnno, description, identifier_doi, language, country, provider, provider_url, provider_id, type, medium,
     batch, gch)values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""
     count = 0
     for filename, fullname in utils.file_list(self.detail_path):
         onemessage = self.parse_detail_one(filename, fullname)
         if onemessage:
             self.sqlList.append(onemessage)
         if utils.parse_results_to_sql(self.conn, stmt, self.sqlList, 50):
             count += len(self.sqlList)
             utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
             self.sqlList.clear()
     utils.parse_results_to_sql(self.conn, stmt, self.sqlList)
     count += len(self.sqlList)
     utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
     self.conn.close()
     self.conn = None
     utils.msg2weixin('%s: 解析完成,成品文件为%s' %
                      (self.provider, self.template_file))
Ejemplo n.º 15
0
 def down_cover(self, message):
     HEADER = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
     }
     bookid = message[0]
     cover_url = message[1]
     filename = self.cover_path + '/' + bookid + '.jpg'
     if os.path.exists(filename):
         self.senddistributefinish('process_cover', bookid)
         return
     try:
         proxy = self.getproxy()
         proxies = {'http': proxy, 'https': proxy}
         resp = requests.get(cover_url,
                             headers=HEADER,
                             timeout=20,
                             proxies=proxies)
         # resp = requests.get(cover_url, headers=HEADER, timeout=20)
     except:
         self.sendwork('down_cover', message)
         return
     if utils.Img2Jpg(resp.content, filename):
         utils.printf('下载图片%s成功' % filename)
         self.senddistributefinish('process_cover', bookid)
     else:
         self.sendwork('down_cover', message)
         return
Ejemplo n.º 16
0
 def startdown_list(self, message):
     utils.printf('%s:开始下载列表页...' % self.provider)
     if not self.list_path:
         self.initpath()
     self.refreshproxypool()
     self.sqlList.clear()
     self.count = 0
     conn = utils.init_db('mysql', 'hepjournal', 4)
     cur = conn.cursor()
     cur.execute('select url,journal_id from issue where stat=0')
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         if len(os.listdir(self.list_path)) == 0:
             utils.logerror('%s:没有新的issue不需要更新' % self.provider)
         else:
             # self.sendwork('down_cover')
             self.sendwork('parse_list')
     for url, journal_id in rows:
         fdir = self.list_path + '/' + journal_id
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         fname = fdir + '/' + journal_id + '_' + url.split(
             '/')[-2] + '_' + url.split('/')[-1].replace('.shtml', '.html')
         self.sendwork('down_list', (url, fname))
Ejemplo n.º 17
0
def down_detail():
    utils.printf("下载详情页开始...")
    now_time = datetime.datetime.now().strftime("%Y%m%d")
    conn = pymysql.connect(DBHOST, DBUSER, DBPWD, DB)
    cur = conn.cursor()
    while True:
        cur.execute('select bookid,stat from book where stat=0 limit 10000')
        rows = cur.fetchall()
        conn.commit()
        if len(rows) == 0:
            break
        for bookid, _ in rows:
            print(bookid)
            url = 'http://10.5.23.18:8079/book/bookinfo.aspx?id={}'.format(bookid)
            dir_path = detail_path + '/' + now_time
            dirname = '%s/%s' % (dir_path,bookid[:3])
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            filename = '%s/%s.html' % (dirname,bookid)
            if os.path.exists(filename):
                sql = 'update book set stat=1 where bookid="{}"'.format(bookid)
                cur.execute(sql)
                conn.commit()
                continue
            resp = utils.get_html(url, proxies=proxy)
            if not resp:
                continue
            with open(filename, mode='w', encoding='gb18030') as f:
                f.write(resp.content.decode())
            sql = 'update book set stat=1 where bookid="{}"'.format(bookid)
            cur.execute(sql)
            conn.commit()
            utils.printf("下载", bookid, "成功...")
Ejemplo n.º 18
0
 def parse_detail_meta(self, message):
     conn = utils.init_db('mysql', 'aiaajournal', 2)
     cur = conn.cursor()
     cur.execute(
         'select gch,journal_name,journal_name_en,pissn,eissn from journal')
     rows = cur.fetchall()
     for gch, journal_name, journal_name_en, pissn, eissn in rows:
         self.dic[gch] = (journal_name, journal_name_en, pissn, eissn)
     cur.close()
     conn.close()
     self.predb3('base_obj_meta_a_template_qk.db3',
                 'base_obj_meta_a_qk.aiaajournal')
     self.sqlList.clear()
     stmt = """insert into base_obj_meta_a (author,author_1st,organ,organ_1st,title,title_alt,keyword,pub_year,pub_date,
     vol,num,journal_raw_id,journal_name,journal_name_alt,page_info,begin_page,end_page,subject,is_oa,down_cnt,lngid,
     rawid,product,sub_db,
     provider,sub_db_id,source_type,provider_url,country,language,batch,down_date,publisher,issn,eissn,abstract,
     abstract_alt,doi,fund,ref_cnt,fulltext_type) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,
     ?,?,?,?,?,?,?,?,?,?,?)"""
     count = 0
     for filename, fullname in utils.file_list(self.detail_path):
         onemessage = self.parse_detail_one(filename, fullname, 'meta')
         if onemessage:
             self.sqlList.append(onemessage)
         if utils.parse_results_to_sql(self.conn, stmt, self.sqlList, 50):
             count += len(self.sqlList)
             utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
             self.sqlList.clear()
     utils.parse_results_to_sql(self.conn, stmt, self.sqlList)
     count += len(self.sqlList)
     utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
     self.conn.close()
     self.conn = None
     utils.msg2weixin('%s: 解析完成,成品文件为%s' %
                      (self.provider, self.template_file))
Ejemplo n.º 19
0
 def parse_detail(self, message):
     conn = utils.init_db('mysql', 'aiaabook', 2)
     cur = conn.cursor()
     cur.execute('select url,pub_year from book')
     rows = cur.fetchall()
     for url, pub_year in rows:
         doi = '10.2514/' + url.split('/')[-1]
         self.dic[doi] = (pub_year)
     cur.close()
     conn.close()
     self.predb3()
     self.sqlList.clear()
     stmt = """insert or ignore into modify_title_info_zt(lngid, rawid, creator, title, identifier_pisbn,
      identifier_eisbn, description, publisher,cover,title_series,
      date,date_created, price, language, country, provider, provider_url, identifier_doi, provider_id,
     type,medium, batch) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
     """
     count = 0
     for filename, fullname in utils.file_list(self.detail_path):
         onemessage = self.parse_detail_one(filename, fullname, 'zt')
         # print(onemessage)
         if onemessage:
             self.sqlList.append(onemessage)
         if utils.parse_results_to_sql(self.conn, stmt, self.sqlList, 50):
             count += len(self.sqlList)
             utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
             self.sqlList.clear()
     utils.parse_results_to_sql(self.conn, stmt, self.sqlList)
     count += len(self.sqlList)
     utils.printf('%s: 插入 %d 条数据到db3' % (self.provider, count))
     self.conn.close()
     self.conn = None
     utils.msg2weixin('%s: 解析完成,成品文件为%s' %
                      (self.provider, self.template_file))
Ejemplo n.º 20
0
    def generateFrequencyMatrix(self):

        if self.sorted_data is None:
            utils.printf('ERROR: Your (sorted) data should be loaded!')
            return
        if self.sorted_data.shape[1] != 2:
            utils.printf(
                'ERROR: Your (sorted) matrix should have 2 columns only (attribute, class)'
            )
            return

        unique_attribute_values, indices = np.unique(
            self.sorted_data[:, 0],
            return_inverse=True)  # first intervals: unique attribute values
        unique_class_values = np.unique(
            self.sorted_data[:, 1])  # classes (column index 1)
        self.frequency_matrix = np.zeros(
            (len(unique_attribute_values),
             len(unique_class_values)))  # init frequency_matrix
        self.frequency_matrix_intervals = unique_attribute_values  # init intervals (unique attribute values)
        self.nclasses = len(unique_class_values)  # number of classes
        self.degrees_freedom = self.nclasses - 1  # degress of freedom (look at table)

        # Generating first frequency values (contingency table), number of instances found in data: attribute-class
        for row in np.unique(indices):
            for col, clase in enumerate(unique_class_values):
                self.frequency_matrix[row, col] += np.where(
                    self.sorted_data[np.where(
                        indices == row)][:, 1] == clase)[0].shape[0]
        self.printInitialSummary()
Ejemplo n.º 21
0
 def down_cover(self, message):
     HEADER = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
     }
     url = message[0]
     cover_url = message[1]
     filename = '%s/10.2514/%s.jpg' % (self.cover_path, url.split('/')[-1])
     if os.path.exists(filename):
         self.senddistributefinish('process_cover', url)
         return
     try:
         proxy = self.getproxy()
         proxies = {'http': proxy, 'https': proxy}
         resp = requests.get('https://arc.aiaa.org' + cover_url,
                             headers=HEADER,
                             timeout=20,
                             proxies=proxies)
         # resp = requests.get(cover_url, headers=HEADER, timeout=20)
     except:
         exMsg = '* ' + traceback.format_exc()
         print(exMsg)
         self.sendwork('down_cover', message)
         return
     if utils.Img2Jpg(resp.content, filename):
         utils.printf('下载图片%s成功' % filename)
         self.senddistributefinish('process_cover', url)
     else:
         self.sendwork('down_cover', message)
         return
Ejemplo n.º 22
0
def downpage(url, proxy):
    urlsplist = url.split('/')
    index_dir = r'E:\lqx\AIP\issue' + '/' + urlsplist[-3]
    index_file = index_dir + '/' + urlsplist[-2] + '_' + urlsplist[-1] + '.html'
    if os.path.exists(index_file):
        return True
    url = url + '?size=all'
    proxies = {
        "http": "http://{}".format(proxy),
        "https": "https://{}".format(proxy)
    }
    feature = 'Table of Contents'
    try:
        resp = utils.get_html(url, feature=feature, proxies=proxies)
    except:
        # exMsg = '* ' + traceback.format_exc()
        # print(exMsg)
        return -1
    if not resp:
        return -1
    if resp.text.find(feature) < 0:
        return -1
    if resp.text.find('</html>') < 0:
        return -1

    if not os.path.exists(index_dir):
        os.makedirs(index_dir)

    with open(index_file, mode='w', encoding='utf8') as f:
        f.write(resp.content.decode('utf8'))
    utils.printf('下载', url, '成功...')
    return True
Ejemplo n.º 23
0
 def startdown_html(self, message):
     infotype = message
     if not self.html_path:
         self.initpath()
     self.refreshproxypool()
     url = 'https://www.ydylcn.com/%s/index.shtml' % infotype
     feature = 'class="list-link-1"'
     fdir = '%s/%s' % (self.html_path, infotype)
     if not os.path.exists(fdir):
         os.makedirs(fdir)
     fname = '%s/1.html' % fdir
     utils.printf(fname)
     if not os.path.exists(fname):
         while True:
             resp = self.gethtml(url, feature)
             if resp:
                 break
         text = resp.content.decode('utf8')
         with open(fname, mode='w', encoding='utf8') as f:
             f.write(text)
     else:
         with open(fname, encoding='utf8') as f:
             text = f.read()
     sel = Selector(text=text)
     pagetotalnum = sel.xpath('//table//tr/td/text()')[1].re(
         r'\s*/(.*)页')[0]
     self.count = 0
     self.totalcount = int(pagetotalnum) - 1
     for page in range(2, int(pagetotalnum) + 1):
         self.sendwork('down_html', (page, infotype))
Ejemplo n.º 24
0
def downcover():
    url = 'https://aip.scitation.org'
    cover_dir_fullpath = os.path.dirname(os.path.abspath(__file__)) + '/cover'
    if not os.path.exists(cover_dir_fullpath):
        os.makedirs(cover_dir_fullpath)
    try:
        resp = utils.get_html(url)
    except:
        # exMsg = '* ' + traceback.format_exc()
        # print(exMsg)
        return False
    if not resp:
        return False
    if resp.text.find('</html>') < 0:
        return False
    soup = BeautifulSoup(resp.content.decode('utf8'), 'lxml')
    divList = soup.select('div.publicationCoverImage')
    # divpb = soup.select_one('div', data - widget - id='bfd39502-c303-4169-88ba-1d2b9bba85ab')
    for divtag in divList:
        coverurl = url + divtag.a.img['src']
        covername = cover_dir_fullpath + '/' + divtag.a['href'].split(
            '/')[-1].lower() + '.jpg'
        if os.path.exists(covername):
            continue
        resp = utils.get_html(coverurl)
        if utils.Img2Jpg(resp.content, covername):
            utils.printf('下载', covername, '成功...')
            time.sleep(3)
    # apburl = 'https://aip.scitation.org/pb-assets/images/publications/apb/apl-bioeng-1483023557097.jpg'
    # apbname = cover_dir_fullpath + '/' + 'apb.jpg'
    # resp = utils.get_html(apburl)
    # if utils.Img2Jpg(resp.content, apbname):
    #     utils.printf('下载', apbname, '成功...')

    return True
 def gethtml(self, url, feature=None, coverflag=False):
     try:
         resp = self.session.get(url,
                                 headers=self.headers,
                                 timeout=20,
                                 proxies=self.proxies)
         if not coverflag:
             if resp.content.decode('utf-8').find('Just a moment...') > 0:
                 utils.printf('Just a moment...')
                 if not self.refreshflag:
                     self.refreshflag = True
                     self.refreshsession()
                 else:
                     while self.refreshflag:
                         time.sleep(1)
                 resp = self.session.get(url,
                                         headers=self.headers,
                                         timeout=20,
                                         proxies=self.proxies)
             if resp.status_code != 200:
                 print('code !=200')
                 return False
             if resp.content.decode('utf-8').find('</html>') < 0:
                 print('not endwith </html>')
                 return False
             if feature:
                 if resp.content.decode('utf-8').find(feature) < 0:
                     print('can not find feature')
                     utils.logerror(url)
                     return False
     except:
         return False
     return resp
Ejemplo n.º 26
0
def parse_list():
    conn = pymysql.connect(DBHOST, DBUSER, DBPWD, DB)
    result = []
    sql_in = "insert ignore into detail(provider_subject,title,url,add_time,look_time) values (%s,%s,%s,%s,%s)"
    for _, filedir in utils.file_list(list_path):
        # E:\work\美星外文\list\日文图书;随笔\2.html
        utils.printf(filedir)
        regex = r"E:\\work\\美星外文\\list\\(.*?)\\"
        provider_subject = re.findall(regex, filedir)[0]
        with open(filedir, mode='r', encoding='gb18030') as f:
            text = f.read()
        html = Selector(text, 'html')
        list_urls = html.xpath(
            "//tr[@class='tdbg_leftall']/td/strong/a/@href").extract()
        for i, item in enumerate(list_urls):
            title = html.xpath("//tr[@class='tdbg_leftall']/td/strong/a/text()"
                               ).extract()[i].split("  ")[0]
            url = "http://202.207.22.13:100/" + item
            add_time = html.xpath(
                "//tr[@class='tdbg_leftall']/td[3]/text()").extract()[i]
            look_time = html.xpath(
                "//tr[@class='tdbg_leftall']/td[4]/text()").extract()[i]
            result.append((provider_subject, title, url, add_time, look_time))
        utils.parse_results_to_sql(conn, sql_in, result)
        print('插入', len(result), ' 个结果到数据库成功')
        result.clear()
 def startdown_list(self, message):
     utils.printf('%s:开始下载列表页...' % self.provider)
     if not self.list_path:
         self.initpath()
     self.sqlList.clear()
     self.refreshproxypool()
     self.count = 0
     conn = utils.init_db('mysql', 'cambridgejournal')
     cur = conn.cursor()
     cur.execute('select url,stat from issue where stat=0')
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         if len(os.listdir(self.list_path)) == 0:
             utils.logerror('%s:没有新的issue不需要更新' % self.provider)
         else:
             # self.sendwork('down_cover')
             self.sendwork('parse_list')
             return
     self.refreshsession()
     for url, _ in rows:
         fdir = self.list_path + '/' + url.split('/')[-3]
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         flast = url.split('/')[-1]
         if flast.find('?pageNum=') > 0:
             flast = flast.split('?')[0] + '_' + flast.split('=')[-1]
         fname = fdir + '/' + flast + '.html'
         self.sendwork('down_list', (url, fname))
Ejemplo n.º 28
0
	def actTryTargetLogin(self, objBrowser, tryUsername, tryPassword, currentTry):
		try:
			#	Fill Login field Information
			objBrowser.select_form(nr = self.formLoginID)
			objBrowser.form[self.formUsernameField] = tryUsername
			objBrowser.form[self.formPasswordField] = tryPassword

			#	Print progress bar
			utils.prints("%10s : %20s%12s%10s / %10s" %(tryUsername, tryPassword, '=' * 6, currentTry, self.sizePasslist))

			#	Send request
			objBrowser.submit()

			#	Refresh page, useful for redirect after login
			objBrowser.reload()

			#	If result has no login form  -> Success **NEED IMPROVE**
			#		add login information to fndData, return True

			if not actions.action_getFormInformation(objBrowser.forms()):
				utils.printf("Found: %s:%s" %(tryUsername, tryPassword), "good")
				self.credentials.append([tryUsername, tryPassword])
				return True
			return False

		except mechanize.HTTPError as error:
			utils.printf(error, "bad")
			sys.exit(1)
Ejemplo n.º 29
0
 def startdown_list(self, message):
     utils.printf('%s:开始下载列表页...' % self.provider)
     if not self.list_path:
         self.initpath()
     self.refreshproxypool()
     self.sqlList.clear()
     self.count = 0
     conn = utils.init_db('mysql', 'aiaajournal', 2)
     cur = conn.cursor()
     cur.execute('select url,stat from issue where stat=0')
     rows = cur.fetchall()
     self.totalcount = len(rows)
     if self.totalcount == 0:
         if len(os.listdir(self.list_path)) == 0:
             utils.logerror('%s:没有新的issue不需要更新' % self.provider)
         else:
             self.sendwork('parse_list')
     for url, _ in rows:
         urlsp = url.split('/')
         base_name = '%s_%s.html' % (urlsp[-2], urlsp[-1])
         fdir = '%s/%s' % (self.list_path, urlsp[-3])
         fname = '%s/%s' % (fdir, base_name)
         if not os.path.exists(fdir):
             os.makedirs(fdir)
         self.sendwork('down_list', (url, fname))
Ejemplo n.º 30
0
 def process_list(self, message):
     self.count = self.count + 1
     sql = "update issue set stat=1 where url='{}'".format(message)
     self.sqlList.append(sql)
     if self.count % 40 == 1:
         utils.printf('%s:下载成功 %s 页' % (self.provider, self.count))
         conn = utils.init_db('mysql', 'aiaajournal', 2)
         cur = conn.cursor()
         for sql in self.sqlList:
             cur.execute(sql)
         conn.commit()
         conn.close()
         self.sqlList.clear()
     if self.count % 100 == 0:
         self.refreshproxypool()
     if self.count == self.totalcount:
         conn = utils.init_db('mysql', 'aiaajournal', 2)
         cur = conn.cursor()
         for sql in self.sqlList:
             cur.execute(sql)
         conn.commit()
         conn.close()
         self.sqlList.clear()
         utils.printf('downloadlist finish')
         self.sendwork('parse_list')
Ejemplo n.º 31
0
    def _continue(self, smallest):
        c1 = self._too_many_intervals()
        c2 = self._more_merges(smallest)

        utils.printf('- Too many intervals?: {}'.format(c1))
        utils.printf('- Can we merge more?: {}'.format(c2))

        return c1 or c2
Ejemplo n.º 32
0
    def chi2(self):
        if self.data is None:
            utils.printf('ERROR: Your data matrix should be loaded!')
            return

        ### Phase1: defining sigLevel values for every numeric attribute, and chimerge for every attribute-column
        sigLevel0 = self._phase1()

        ### Phase2: merging attrinutes if needed (vertical-wise)
        self._phase2(sigLevel0)
Ejemplo n.º 33
0
 def loadFrequencyMatrix(self, frequency_matrix, unique_attribute_values):
     '''
     :param frequency_matrix: numpy array
     :return: void
     '''
     if type(frequency_matrix) != np.array:
         utils.printf('ERROR: data must be a numpy.array')
         return
     self.frequency_matrix = frequency_matrix
     self.frequency_matrix_intervals = unique_attribute_values
     self.nclasses = self.frequency_matrix.shape[1]
     self.degrees_freedom = self.nclasses - 1
     self.printInitialSummary()
Ejemplo n.º 34
0
def joinscandirlbname(lbnamemap,scandir_list):
    out = {}
    for (date,aid,lid,file_path) in scandir_list:
        if not lbnamemap.has_key(lid):
            utils.printf("Warning lid[%i] was not found in lbmap\n",lid)
            continue
        name = lbnamemap[lid][2]
        if not out.has_key(aid):
            out[aid] = []
        dict_row = {"file_path" :file_path,
                    "aid":aid,
                    "lid":lid,
                    "lname":name,
                    "date":date}
        out[aid].append(dict_row)
    return out  
Ejemplo n.º 35
0
    def loadData(self, data, issorted=False):
        '''
        :param data: numpy matrix
        :param issorted: boolean, if data is already sorted, no need to sort again (based on attribute_column)
        :return:
        '''
        if type(data) != np.matrix and type(data) != np.array:
            utils.printf('ERROR: data must be a numpy.matrix or numpy.array')
            return

        self.data = data # numpy.matrix (x,2). column index 0 refers to attributes column and index 1 classes
        if not issorted:
            self.sorted_data = np.array(np.sort(data.view('i8,i8'), order=['f0'], axis=0).view(np.float))   #always sorting column 0 (attribute column)
        else:
            self.sorted_data = np.array(data)
        utils.printf('Sorted data: matrix {}x{}'.format(self.sorted_data.shape[0],self.sorted_data.shape[1]))
Ejemplo n.º 36
0
 def loadData(self, data):
     '''
     :param data: numpy matrix
     :return:
     '''
     if type(data) != np.matrix and type(data) != np.array:
         utils.printf('ERROR: data must be a numpy.matrix or numpy.array')
         return
     self.data = np.array(data) # no need to sort at this point
     self.nattributes = self.data.shape[1]-1 # last column refers to class label
     self.nclasses = np.unique(self.data[:,self.nattributes]).shape[0]
     self.degrees_freedom = self.nclasses - 1
     self.chimerge_per_column = {colid:None for colid in range(self.nattributes)}
     self.alpha_per_column = {colid:None for colid in range(self.nattributes)}
     self.attribute_can_be_merged = {colid:True for colid in range(self.nattributes)}
     utils.printf('Data: matrix {}x{} ({} numeric attributes)'.format(self.data.shape[0],self.data.shape[1], self.nattributes))
     self._loadChiDistribution()
Ejemplo n.º 37
0
    def generateFrequencyMatrix(self):

        if self.sorted_data is None:
            utils.printf('ERROR: Your (sorted) data should be loaded!')
            return
        if self.sorted_data.shape[1] != 2:
            utils.printf('ERROR: Your (sorted) matrix should have 2 columns only (attribute, class)')
            return

        unique_attribute_values, indices = np.unique(self.sorted_data[:,0], return_inverse=True)    # first intervals: unique attribute values
        unique_class_values = np.unique(self.sorted_data[:,1])                                      # classes (column index 1)
        self.frequency_matrix = np.zeros((len(unique_attribute_values), len(unique_class_values)))  # init frequency_matrix
        self.frequency_matrix_intervals = unique_attribute_values                                   # init intervals (unique attribute values)
        self.nclasses = len(unique_class_values)                                                    # number of classes
        self.degrees_freedom = self.nclasses - 1                                                    # degress of freedom (look at table)

        # Generating first frequency values (contingency table), number of instances found in data: attribute-class
        for row in np.unique(indices):
            for col, clase in enumerate(unique_class_values):
                self.frequency_matrix[row,col] += np.where(self.sorted_data[np.where(indices == row)][:,1] == clase)[0].shape[0]
        self.printInitialSummary()
Ejemplo n.º 38
0
    def _inConsistency(self):
        #1. matrix with all attribute-columns (except class-column)
        #2. find duplicates (register indexes)
        #3. for every duplicated instance do:
        #   3.1. calculate inconsistency_count = (n-ck) where n is the number of time such instance is duplicated and ck the largest number of duplicates of such instance among all classes
        #4. incosistency rate sum all inconsistency_count and divide by the number of instances (total instances)

        #
        # IT SHOULD NOT BE OVER RAW DATA, BUT OVER THE MERGED DATA!!!
        # To be fixed!
        #
        if self.data is None:
            utils.printf('ERROR: Your data matrix should be loaded!')
            return

        # 1. matrix with only attribute values
        # 2. identify duplicates
        unique_values, unique_indexes = np.unique(self.data[:,:self.nattributes-1], return_inverse=True)
        unique_counts = np.bincount(unique_indexes)
        matching_instances = unique_values[unique_counts>1]
        sum_inconsistencies = 0
        total_instances = unique_indexes.shape[0]

        # 3. calculating inconsistency_count for every instance
        for matching_instance in matching_instances:
            c = {}
            for colid in range(self.nclasses):
                c[colid] = (self.data[self.data[:,self.nattributes]==colid] == matching_instance).sum()
            n = sum(c.values())
            cmax = max(c.values())
            inconsistency_count = n - cmax
            sum_inconsistencies += inconsistency_count

        # 4. inconsistency rate
        inconsistency_rate = sum_inconsistencies / float(total_instances)
        return inconsistency_rate
Ejemplo n.º 39
0
def main(args):
    prog = args[0]
    if len(args) < 3:
        usage(prog)
        sys.exit()
    conf_file = []
    conf_file_a = args[1]
    conf_file_b = args[2]
    printf("Enter username: "******"Enter password: "******"Scanning files %s and %s\n", conf_file_a, conf_file_b)
    ldap_a = utils.LdapConnecter(user, passwd, conf_file=conf_file_a)
    ldap_b = utils.LdapConnecter(user, passwd, conf_file=conf_file_b)
    printf("binding to ldap from conf %s\n", conf_file_a)
    ldap_a.bind()
    printf("binding to ldap from conf %s\n", conf_file_b)
    ldap_b.bind()
    printf("\n")
    ldap_groups = [v for (k,v) in ldap_a.conf["roles"].items()]
    for ldap_group in ldap_groups:
        printf("ldap group %s\n", ldap_group)
        printf("----------------------------------------------\n")
        printf("\n")
        members_a = ldap_a.get_group_members_ssos(ldap_group)
        members_b = ldap_b.get_group_members_ssos(ldap_group)
        a_ssos = set(members_a.keys())
        b_ssos = set(members_b.keys())
        n_common = len(a_ssos & b_ssos)
        printf("common member count: %d\n", n_common )

        printf("    missing from %s:\n", conf_file_a)
        missing_from_a = sorted(list(b_ssos - a_ssos))
        if len(missing_from_a) > 0:
            max_col_len = max([len(sso) for sso in missing_from_a])
            for sso in missing_from_a:
                lsso = "%s" %((sso + ":").ljust(max_col_len +2),)
                printf("    %s%s\n", lsso, members_b[sso])
        else:
            printf("    None\n")
        printf("\n")
        printf("    missing from %s:\n", conf_file_b)
        missing_from_b = sorted(list(a_ssos - b_ssos))
        if len(missing_from_b) > 0:
            max_col_len = max([len(sso) for sso in missing_from_b])
            for sso in missing_from_a:
                lsso = "%s" % ((sso + ":").ljustt(max_col_len +2),)
                printf("    %s%s\n", lsso, members_a[sso])
        else:
            printf("    None\n")
        printf("\n")

    printf("\n")
    ldap_a.unbind()
    ldap_b.unbind()
Ejemplo n.º 40
0
def usage(prog):
    printf("usage is %s <conf_file1> <conf_file2>\n", prog)
    printf("\b")
    printf("list the members of the specified group name\n")
    printf("\n")
Ejemplo n.º 41
0
import sys
import os

def usage(prog):
    printf("usage is %s <conf_file>\n", prog)
    printf("get the user group and roles based on the conf file\n")
    printf("You will be prompted for your SSO user and passwd\n")
    printf("\n")

if __name__ == "__main__":
    prog = sys.argv[0]
    if len(sys.argv) < 2:
        usage(prog)
        sys.exit() 
    conf_file = sys.argv[1]
    printf("Enter username: "******"Enter password: "******"User %s is a member of %d groups\n", user, len(groups))
    printf("--------------------------------------------------------\n")
    for (group_cn, group_dn) in groups:
        printf("%s\n", group_dn)
    printf("\n")
    printf("User %s has %d roles\n", user, len(roles))
    printf("--------------------------------------------------------\n")
    for role in roles:
Ejemplo n.º 42
0
def usage(prog):
    printf("usage is %s <conf_file>\n", prog)
    printf("get the user group and roles based on the conf file\n")
    printf("You will be prompted for your SSO user and passwd\n")
    printf("\n")
Ejemplo n.º 43
0
    def chimerge(self):
        if self.frequency_matrix is None:
            utils.printf('ERROR: Your frequency matrix should be loaded!')
            return

        chitest = {}
        counter = 0
        smallest = -1

        while self._too_many_intervals():

            ###
            # CHI2 TEST
            ###
            chitest = {}
            shape = self.frequency_matrix.shape
            for r in range(shape[0] - 1):
                interval = r,r+1
                chi2 = self.chisqrtest(self.frequency_matrix[[interval],:][0])
                if chi2 not in chitest:
                    chitest[chi2] = []
                chitest[chi2].append( (interval) )
            smallest = min(chitest.keys())
            biggest = max(chitest.keys())

            ###
            # SUMMARY
            ###
            counter += 1
            utils.printf('')
            utils.printf('ROUND {}: {} intervals. Chi min:{}, Chi max:{}'.format(counter, self.frequency_matrix.shape[0], smallest, biggest))
            utils.printf('CHI2 VALUES: {}'.format(chitest.keys()))

            ###
            # MERGE
            ###
            if self._more_merges(smallest):
                utils.printf('MERGING INTERVALS: chi {} -> {}'.format(smallest, chitest[smallest]))
                for (lower,upper) in list(reversed(chitest[smallest])):                                     # reversed, to be able to remove rows on the fly
                    for col in range(shape[1]):                                                             # checking columns (to append values from row i+1 ---to be removed--- to row i)
                        self.frequency_matrix[lower,col] += self.frequency_matrix[upper,col]                # appending frequencies to the remaining interval
                    self.frequency_matrix = np.delete(self.frequency_matrix, upper, 0)                      # removing interval (because we merged it in the previous step)
                    self.frequency_matrix_intervals = np.delete(self.frequency_matrix_intervals, upper, 0)  # also removing the corresponding interval (real values)
                utils.printf('NEW INTERVALS: ({}):{}'.format(len(self.frequency_matrix_intervals),self.frequency_matrix_intervals))

            else:
                break

        self.chitestvalues = chitest
        utils.printf('END (chi {} > {})\n'.format(smallest, self.threshold))
Ejemplo n.º 44
0
 def printInitialSummary(self):
     utils.printf('')
     utils.printf('ROUND 0: Initial values:')
     utils.printf('- Number of classes: {}'.format(self.nclasses))
     utils.printf('- Degrees of Freedom: {} (deprecated)'.format(self.degrees_freedom))
     utils.printf('- Threshold: {}'.format(self.threshold))
     utils.printf('- Max number of intervals: {}'.format(self.max_number_intervals))
     utils.printf('- Number of (unique) intervals: {}'.format(len(self.frequency_matrix_intervals)))
     utils.printf('- Frequency matrix: {}x{} (sum {})'.format(self.frequency_matrix.shape[0], self.frequency_matrix.shape[1], self.frequency_matrix.sum()))
     utils.printf('- Intervals: {}'.format(self.frequency_matrix_intervals))
Ejemplo n.º 45
0
 def printInitialSummary(self):
     utils.printf('')
     utils.printf('ROUND 0: Initial values:')
     utils.printf('- Number of attributes: {}'.format(self.nattributes))
     utils.printf('- Number of classes: {}'.format(self.nclasses))
     utils.printf('- Degrees of Freedom: {} (deprecated)'.format(self.degrees_freedom))
     utils.printf('- alpha (initial value of sigLevel): {}'.format(self.alpha))
     utils.printf('- delta (inConsistency level): {}'.format(self.delta))
Ejemplo n.º 46
0
 def flush_row(self, row):
     printf("flush-row: %d", row)
     return
Ejemplo n.º 47
0
 def put_shape(self, location, shape_vector):
     printf("put-shape: location: [row: %d, col: %d], shape: %s",
            location[0], location[1],
            shape_vector)
     return
Ejemplo n.º 48
0
 def put_center_string(self, row, display_string):
     printf("put-center-string: [row: %d, display-string: %s]", row, display_string)
     return
Ejemplo n.º 49
0
 def printFinalSummary(self):
     utils.printf('FINAL SUMMARY')
     utils.printf('{}{}'.format('Intervals: ',self.frequency_matrix_intervals))
     utils.printf('{}{}'.format('Chi2: ',', '.join(['[{}-{}):{:5.1f}'.format(v[0][0],v[0][1],k) for k,v in utils.sortDictByValue(self.chitestvalues,False)])))
     utils.printf('{} ({}x{})\n{}'.format('Interval-Class Frequencies',self.frequency_matrix.shape[0],self.frequency_matrix.shape[1],self.frequency_matrix))
Ejemplo n.º 50
0
    lbRows = getLidAidNameFromLbs(con,cur)
    lb2name = getLbid2name(lbRows)
    cur.close()
    con.close()
    scan_list = scandir_filter(scandir(cache_dir),**kw)
    cfFiles = joinscandirlbname(lb2name,scan_list)
    return (scan_list,cfFiles)

def main(args):
    kw = {}
    for arg in args[1:]:
        (k,v) = arg.split("=")
        try:
            kw[k.strip()] = int(v.strip())
        except ValueError:
            kw[k.strip()] = v.strip() #if its not an int treat it like a string
    return getCfFiles(**kw)

if __name__ == "__main__":
    (scan_list,cfFiles) = main(sys.argv)
    for i in xrange(0,len(scan_list)):
        utils.printf("scan_list[%i]=%s\n",i,scan_list[i])
    for(aid,files) in cfFiles.items():
        for file_info in files:
            utils.printf("%s: %s\n",aid,file_info)

#example invocation
#./cfupload.py aid=452605 date_gte=2013060600
#./cfupload.py aid=452605
#/cfupload.py aid=682644 date_gte=2013052500 date_lte=2013052523
Ejemplo n.º 51
0
import os

def usage(prog):
    printf("usage is %s <conf_file> <group_name>\n", prog)
    printf("\b")
    printf("list the members of the specified group name\n")
    printf("\n")

if __name__ == "__main__":
    prog = sys.argv[0]
    if len(sys.argv) < 3:
        usage(prog)
        sys.exit() 
    conf_file = sys.argv[1]
    group_name = sys.argv[2]
    printf("Enter username: "******"Enter password: "******"group %s has %d members\n", group_name, n_groups);
    printf("-----------------------------------------------------------\n")
    llen = max([len(sso) for sso in ssos])
    for sso in ssos:
        lsso = "%s" % ((sso + ":").ljust(llen + 2),)
Ejemplo n.º 52
0
 def put_string(self, location, display_string):
     printf("put-string: location: [row: %d, col: %d], display-string: [fmt: %s, str: %s]",
            location[0], location[1],
            display_string[0], display_string[1])
     return
Ejemplo n.º 53
0
def execute(frame, bc):
    code = bc.code
    pc = 0
    while True:
        # required hint indicating this is the top of the opcode dispatch
        driver.jit_merge_point(pc=pc, code=code, bc=bc, frame=frame)

        if pc >= len(code):
            return W_Null()

        c = ord(code[pc])
        arg = ord(code[pc + 1])
        pc += 2
        if c == bytecode.LOAD_CONSTANT:
            w_constant = bc.constants[arg]
            frame.push(w_constant)
        elif c == bytecode.LOAD_VAR:
            frame.push(frame.vars[arg])
        elif c == bytecode.LOAD_NULL:
            frame.push(W_Null())
        elif c == bytecode.LOAD_BOOLEAN:
            frame.push(W_Boolean(bool(arg)))
        elif c == bytecode.LOAD_PARAM:
            frame.push_arg(frame.pop())  # push to the argument-stack
        elif c == bytecode.DISCARD_TOP:
            frame.pop()
        elif c == bytecode.RETURN:
            if frame.valuestack_pos > 0:
                return frame.pop()
            else:
                return W_Null()
        elif c == bytecode.BINARY_ADD:
            right = frame.pop()
            left = frame.pop()
            w_res = left.add(right)
            frame.push(w_res)
        elif c == bytecode.BINARY_LT:
            right = frame.pop()
            left = frame.pop()
            frame.push(left.lt(right))
        elif c == bytecode.BINARY_GE:
            right = frame.pop()
            left = frame.pop()
            frame.push(left.ge(right))
        elif c == bytecode.BINARY_EQ:
            right = frame.pop()
            left = frame.pop()
            frame.push(left.eq(right))
        elif c == bytecode.BINARY_SUB:
            right = frame.pop()
            left = frame.pop()
            frame.push(left.sub(right))
        elif c == bytecode.BINARY_STRINGJOIN:
            right = frame.pop()
            left = frame.pop()
            frame.push(left.append(right))
        elif c == bytecode.JUMP_IF_FALSE:
            if not frame.pop().is_true():
                pc = arg
        elif c == bytecode.JUMP_BACKWARD:
            pc = arg
            # required hint indicating this is the end of a loop
            driver.can_enter_jit(pc=pc, code=code, bc=bc, frame=frame)
        elif c == bytecode.CALL:
            method = bc.functions[arg]
            method.body.globals = [None]*bc.numvars  # XXX

            new_bc = method.body
            new_frame = Frame(new_bc)

            # reverse args index to preserve order
            for i in range(len(method.params)):
                index = len(method.params) - 1 - i
                new_frame.vars[index] = frame.pop_arg()

            res = execute(new_frame, new_bc)
            frame.push(res)
        elif c == bytecode.PRINT:
            item = frame.pop()
            printf(item.str())
        elif c == bytecode.ASSIGN:
            frame.vars[arg] = frame.pop()
        else:
            raise Exception("Unkown operation %s" % bytecode.bytecodes[c])
Ejemplo n.º 54
0
 def _loadChiDistribution(self):
     with open('data/chisquare_distribution.data','r') as f:
         data = json.load(f)
     self.chidistribution = {float(k):v for k,v in data.items()}
     utils.printf('ChiSquare distribution table loaded. {} sigLevel and {} degrees of freedom.'.format(len(self.chidistribution.keys()),len(self.chidistribution.values()[0])-1))