def _email_pattern(self, domain, api_key=""): print ''' Score email pattern based on number of occurrences ''' qry = {'where':json.dumps({'domain': domain}),'limit':1000} crawls = Parse().get('CompanyEmailPatternCrawl', qry) crawls = pd.DataFrame(crawls.json()['results']) df = crawls[crawls.pattern.notnull()].drop_duplicates('email') _df = df[df.crawl_source != "mx_check"] df = df[df.crawl_source == "mx_check"].drop_duplicates('pattern') if len(df.pattern) > 2: df = df[df.crawl_source != "mx_check"] df = _df.append(df) df = df.pattern.value_counts() score = pd.DataFrame() score['pattern'], score['freq'] = df.index, df.values score['score'] = [freq / float(score.freq.sum()) for freq in score['freq']] score['source'], score['tried'] = 'clearspark', False score = score.fillna("") score = score.to_dict('records') #print score, api_key print "SCORE" print score score = {'domain':domain, 'company_email_pattern':score} self._find_if_object_exists('EmailPattern','domain', domain, score) # TODO - add date crawled to score if RQueue()._has_completed("{0}_{1}".format(domain, api_key)): if score['company_email_pattern'] == []: score['email_guess'] = EmailGuess()._random() #q.enqueue(Sources()._jigsaw_search, domain) Webhook()._update_company_email_pattern(score)
def inbound_parse(): parse = Parse(config, request) # Sample proccessing action print(parse.key_values()) # Tell SendGrid's Inbound Parse to stop sending POSTs # Everything is 200 OK :) return "OK"
def run(self): global args, events, hist p = Parse(args.infile, ('chan','raw') ) data = None while True: if self._abort_flag: print("Worker aborted") return if self._pause_flag: time.sleep(0.1) continue try: data = p.next() except StopIteration: time.sleep(1) continue if self._notify_window.ready: self._notify_window.ready = False wx.PostEvent(self._notify_window, DataReadyEvent(data)) events.append(data) #TODO: if hist: e_ts, e_chan, e_summ, e_baseline, e_dbaseline = integrate(data)[0:5] hist.append(e_summ)
def visualise_days(self): """Visualise data by day of the week.""" data_parse = Parse() data_file = data_parse.main() # Returns a dict where it sums the total values for each key. # In this case, the keys are the DaysOfWeek, and the values are # a count of incidents. counter = Counter(item["DayOfWeek"] for item in data_file) # Separate out the counter to order it correctly when plotting. data_list = [counter["Monday"], counter["Tuesday"], counter["Wednesday"], counter["Thursday"], counter["Friday"], counter["Saturday"], counter["Sunday"] ] day_tuple = tuple(["Mon", "Tues", "Wed", "Thurs", "Fri", "Sat", "Sun"]) # Assign the data to a plot plt.plot(data_list) # Assign labels to the plot plt.xticks(range(len(day_tuple)), day_tuple) # Save the plot! plt.savefig("Days.png") # Close figure plt.clf()
def inbound_parse(): """Process POST from Inbound Parse and print received data.""" parse = Parse(config, request) # Sample processing action print(parse.key_values()) # Tell SendGrid's Inbound Parse to stop sending POSTs # Everything is 200 OK :) return "OK"
def check_if_email_pattern_exists(args): parse, google = Parse(), Google() domain = tldextract.extract(args['domain']) domain = "{}.{}".format(domain.domain, domain.tld) qry = json.dumps({'domain': domain}) qry = {'where':qry, 'include':'company_email_pattern'} pattern = parse.get('CompanyEmailPattern', qry).json()
def _email_crawl_pointers(self, qry): parse = Parse() results = parse.get('CompanyEmailPatternCrawl', qry).json() results = results['results'] if "results" in results.keys() else results crawls = pd.DataFrame(results) crawl_objectids = crawls.drop_duplicates('pattern').objectId crawl_pointers = [parse._pointer('CompanyEmailPatternCrawl', objectId) for objectId in crawl_objectids] return crawl_pointers
def test_unrollCondition(): from parse import Parse rcContainer = Parse(os.path.join(ROOT, "data", "sample-fss-output-ipa")).parse() for k in rcContainer.getKeys(): print "=" * 30 print "k:", k unrolled = rcContainer.getUnrolledConditionsByKey(k) print "unrolled:", unrolled return
def main(options, args): """ We run this when we run this script from the command line. >>> main(None, None) """ parse = Parse('_input/test') parse.crime = 'violent' parse.grep = False parse.location = 'capitol-hill' result = parse.get_specific_crime() printcrimes = PrintCrimes(result, 'specific')
def _email_pattern_research(): companies = Parse().get("Company", {"order":"-createdAd", "limit":1000}) for company in companies.json()["results"]: if "domain" in company.keys(): domain = company["domain"] api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8" #name = request.args['name'] if "name" in request.args.keys() else "" name = "" q.enqueue(EmailGuess().search_sources, domain, name, api_key, timeout=6000) return {"research":"started"}
def get_crime_item(self): """ Return a Parse report. >>> report = Report(**{'date_type': 'test', 'location': 'capitol-hill', 'report_type': 'specific'}) >>> output = report.get_crime_item() >>> print output['count'], output['crime'] 29 None """ fn = self.build_filename() parse = Parse('_input/%s' % fn) parse.crime = self.crime parse.grep = self.grep try: parse.location = self.location except: pass try: parse.set_timespan(self.timespan) except: pass # *** eventually we might want to allow for other types of reports if self.report_type == 'rankings': result = parse.get_rankings() elif self.report_type == 'specific': result = parse.get_specific_crime() return result
class PCSerial(TaskThread): def __init__(self): TaskThread.__init__(self) self.buf = StringBuffer() self.p = Parse(self.buf) self.p.start() def read(self): #if len(self.buf.getvalue())>0: # print "value \"%s\"\n"%self.buf.getvalue() return self.buf.read() def shutdown(self): self.p.shutdown() TaskThread.shutdown(self)
def test(): BasicIO.init() led = Led(4) exitCondition = threading.Condition() def buttonEvent(button): if button.down(): led.on() notify(exitCondition) else: led.off() button = Button(14, Button.LOW, buttonEvent) # wait(cond) sensor = 22 pin = 15 # r = requests.get('https://google.com') # print(r.text.encode('utf-8')) # while True: # humidity, temperature = Adafruit_DHT.read_retry(sensor, pin) # print("temperature=%0.1f, humidity=%2.1f" % (temperature, humidity)) # time.sleep(15) parse = Parse('BiPX6d3kDzfinO9Y8kHvNo1rnCPl8PoJW2zUXHYk', 'zNMGmfiYzq7W4eyD7mpSfixKIvxaZRPjCS38Qh6z') while True: humidity, temperature = Adafruit_DHT.read_retry(sensor, pin) print("temperature=%0.1f, humidity=%2.1f" % (temperature, humidity)) resp = parse.upload('DHTMeter', '{"temperature":%2.1f,"humidity":%2.1f}' % (temperature, humidity)) print("uploaded=%s" % resp) led.blink(3) # print(resp) time.sleep(60) led.blink()
class TestParser(unittest.TestCase): def setUp(self): self.parse = Parse('entrada.txt') def test_parse(self): self.parse.parse_file() path = os.getcwd() + '/server_1/5352b590-05ac-11e3-9923-c3e7d8408f3a' self.assertTrue(os.path.isfile(path)) f = open(path,'r') line = f.readlines() self.assertEqual(line[0], '177.126.180.83 - - [15/Aug/2013:13:54:38 -0300] \"GET /meme.jpg HTTP/1.1\" 200 2148 \"-\" \"userid=5352b590-05ac-11e3-9923-c3e7d8408f3a\"\n') self.assertEqual(line[2], '177.126.180.83 - - [15/Aug/2013:13:57:48 -0300] \"GET /lolcats.jpg HTTP/1.1\" 200 5143 \"-\" \"userid=5352b590-05ac-11e3-9923-c3e7d8408f3a\"\n') def test_parse_error(self): self.parse.parse_file() path = os.getcwd() + '/server_1/f85f124a-05cd-11e3-8a11-a8206608c529' self.assertTrue(os.path.isfile(path)) f = open(path,'r') line = f.readlines() self.assertNotEqual(line[0], '177.126.180.83 - - [15/Aug/2013:13:54:38 -0300] \"GET /meme.jpg HTTP/1.1\" 200 2148 \"-\" \"userid=5352b590-05ac-11e3-9923-c3e7d8408f3a\"\n')
def getParsedEvt(e): pe = ParsedEvt(False, False, "", long(-1), long(-1), long(-1), "") pe.isPollEvent = Parse.isPollEvent(e) pe.isSleepEvent = Parse.isSleepEvent(e) pe.receiver = Parse.getNode(e) if not pe.isPollEvent: pe.leader = Parse.getVoteField(e, "Leader = ", Parse.eventToLeaderMap) pe.zxid = Parse.getVoteField(e, "ZxID = ", Parse.eventToZxidMap) pe.epoch = Parse.getVoteField(e, "Epoch = ", Parse.eventToEpochMap) pe.queue = Parse.getQueue(e) return pe
def _daily_collect(self, profile_id): profile = Parse().get("ProspectProfile/"+profile_id, {"include":"profiles"}) _signal = [i["press_id"] for i in profile.json()["profiles"] if i["className"] == "TwitterProfile"] d1, d2 = Helper()._timestamp() qry = {"signal":_signal[0],"timestamp":{"$gte": d1,"$lte": d2}} press = Parse().get("Tweet",{"limit":1000, "skip":0, "count":True, "where": json.dumps(qry), "order":"-timestamp"}).json()["results"] profile = profile.json() report = {"user": profile["user"], "user_company": profile["user_company"]} report["profile"] = Parse()._pointer("ProspectProfile", profile["objectId"]) _report = Parse().create("SignalReport", report).json()["objectId"] _report = Parse()._pointer("SignalReport", _report) cos = pd.DataFrame(press) if cos.empty: return cos = cos[cos.company_name.notnull()].drop_duplicates("company_name") cos["report"] = [_report] * len(cos.index) Parse()._batch_df_create("PeopleSignal", cos) # TODO - Queue ProspectTitle Search if present q.enqueue(PeopleSignal()._check_for_people_signal, cos, profile, _report)
def __init__(self, moves): """ Creates a game state based on the supplied move list moves: a string of a game state in standard notation """ from square import Square self.squares = [] for i in range(9): self.squares.append(Square(self, i+1)) from parse import Parse moves = Parse.moves(moves) self.moves = [] for move in moves: # Play through supplied moves self.step(move)
def __init__(self,args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug( "Starting Collector process in %s"%os.getcwd()) #self.logger.debug( "Gevent Version %s"%gevent.__version__) #TODO: move output file name to config fname = "./NetFlow.%d.bin"%int(time.time()) #WARN: might want to remove this after testing self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.context = Context() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = settings.SETTINGS.get("collector","inWindow") self.score = Score() #TODO: move csv name to config #self.csv = CSV("output.csv") self.output = Output() return super(Collector,self).__init__(args)
def run(file, count, minsylls, maxsylls): """ Actually runs the generator. Note that all parameters are supplied by the CLI through click. """ # Perform error checking if minsylls > maxsylls: click.echo("ERROR: minsylls cannot be greater than maxsylls") sys.exit(2) elif minsylls < 1: click.echo("ERROR: minsylls must be greater than 1") sys.exit(2) elif count < 1: click.echo("ERROR: count must be greater than 1") sys.exit(2) global CATEGORIES, SYLLABLES parser = Parse(file) CATEGORIES = parser.categories SYLLABLES = parser.syllables print_words(count, minsylls, maxsylls)
def main(): if sys.argv[1][-3:] != "asm": print("Error: wrong file type for input, use \".ams\" file !") sys.exit() inputfile = sys.argv[1] #inputfile = "Max.asm" outputfile = inputfile[:-3] + "hack" #outputfile = "Max.hack" par = Parse(inputfile) code = CodeTrans() symbols = SymbolTable() symbol_search(par, symbols) variable_search(par, symbols) binary_list = assembler(par, symbols, code) with open(outputfile, "w") as of: for bcode in binary_list: of.write(bcode + "\n")
def pinmuxgen(pth=None, verify=True): """ populating the file with the code """ p = Parse(pth, verify) iocells = Interfaces() iocells.ifaceadd('io', p.N_IO, io_interface, 0) ifaces = Interfaces(pth) #ifaces.ifaceadd('io', p.N_IO, io_interface, 0) init(p, ifaces) bp = 'bsv_src' if pth: bp = os.path.join(pth, bp) if not os.path.exists(bp): os.makedirs(bp) bl = os.path.join(bp, 'bsv_lib') if not os.path.exists(bl): os.makedirs(bl) cwd = os.path.split(__file__)[0] # copy over template and library files shutil.copyfile(os.path.join(cwd, 'Makefile.template'), os.path.join(bp, 'Makefile')) cwd = os.path.join(cwd, 'bsv_lib') for fname in ['AXI4_Lite_Types.bsv', 'Semi_FIFOF.bsv']: shutil.copyfile(os.path.join(cwd, fname), os.path.join(bl, fname)) bus = os.path.join(bp, 'busenable.bsv') pmp = os.path.join(bp, 'pinmux.bsv') ptp = os.path.join(bp, 'PinTop.bsv') bvp = os.path.join(bp, 'bus.bsv') write_pmp(pmp, p, ifaces, iocells) write_ptp(ptp, p, ifaces) write_bvp(bvp, p, ifaces) write_bus(bus, p, ifaces)
def load_subjects(soup, poem): """loads subjects from poem meta tags""" poem_id = poem.poem_id subjects = Parse.parse_subjects(soup) if subjects: for subject in subjects: try: subject_id = Subject.query.filter(Subject.subject_name == subject).one().subject_id except NoResultFound: log_err('subject', f, subject) s = Subject(subject_name=subject) db.session.add(s) db.session.flush() subject_id = s.subject_id poemsubject = PoemSubject(poem_id=poem_id, subject_id=subject_id) db.session.add(poemsubject) db.session.flush()
from download import Download from parse import Parse from item import Item import xml.etree.ElementTree as ET url = 'http://showrss.info/feeds/1166.rss' #path = '1166.rss' src = Download.read_content('test.xml') #Download.save_file(url, 'test2.xml') #print(src) l = Parse.parse_content(src) for x in l: print(x) #root = ET.fromstring(src) #print([x for x in [child for child in root.iter('item')]])
import threading # Application imports sys.path.append("../modules") from config import Config from modem import Modem from logger import log from queue import Queue from parse import Parse # Global variables QDIR = "moqueue" # Instantiate objects App = Modem() Parser = Parse() Q = Queue(QDIR) # Callback is fired when an SBD message comes in. def _callback(data): print("Callback: %s" % data) message_response = Config.get("respond")["response"] string_to_match = Config.get("respond")["match"] if string_to_match in data: Q.add(message_response) old = os.stat(QDIR).st_mtime log.debug("Old time: %s" % repr(old))
# print answer # print "^^^" return answer if node.label() == "VP": for sub_node in node: if (sub_node.label() == "PP" or sub_node.label() == "SBAR") and " ".join(sub_node.leaves()).lower() not in question.lower(): answer = " ".join(sub_node.leaves()) + "." answer = answer[0].upper() + answer[1:] # print answer return answer return relevant WH = WH() P = Parse() # test_q = 'when did Clint Dempsey score against Ghana 29 seconds into the group play match ?' # test_relevant = 'On June 16, Clint Dempsey scored against Ghana 29 seconds into the group play match.' # test = When_answer() # print test_q # print test_relevant # test.get_answer(test_q, test_relevant) # test2 = Where_answer() # test_q2 = "where did he obtain his master degree" # test_relevant2 = "In England he obtained his master degree." # test_relevant3 = "the picture is above the wall" # test_q3 = "where is the picture?" # print test_q2 # print test_relevant2 # test2.get_answer(test_q2, test_relevant2)
def main(): print("\nEnter code to parse:") string = input() ast = Parse.build_ast(Parse.prepare(string)) print("\nAbstract syntax tree:", ast, sep="\n") print("\nResult:", interpret(ast))
"$": Value("builtin", lambda x, y, env: env_lookup(env, y.value[1:])), "->": Value("special", fn_), "=>": Value("special", fn_early), "fn": Value("builtin", fn), "?=": Value("builtin", lambda x, y, env: match_(x, y, env)), ":=": Value("builtin", lambda x, y, env: assign_(x, y, env)), "::=": Value("builtin", lambda x, y, env: assign_(x, y, env[0])), "ENV": Value("builtin", lambda x, y, env: Value("env", env[1])), # help "pr": Value("builtin", print_), }) ENV = (ENV0, {}) # test fns ENV[1]["foo"] = Value("fn", Fn(ENV, "a", "b", Parse(r"a+b+b"))) ENV[1]["map_"] = Value("fn", Fn(ENV, "x", "y", Parse(r"\f; \col ?= y | \a(col)\b ?= x ? (a map_(f; col)) col (b map_(f; col)) :| x f()"))) ENV[1]["map"] = Value("fn", Fn(ENV, "x", "f", Parse(r"x is_cons() ? x map_(f; x color.)"))) def env_lookup(env, key): while True: parent, env_dict = env if key in env_dict: return env_dict[key] if not parent: raise Exception(f"Can't env lookup: {key}") env = parent
def __init__(self, file_name): self.code = Code() self.parser = Parse(file_name)
from nltk.tree import Tree as Tree from parse import Parse from pattern.en import conjugate from pattern.en import tenses sNLP = Parse() BE_VB_LIST = [ "is", "was", "are", "am", "were", "will", "would", "could", "might", "may", "should", "can" ] DO_DID_DOES = ["do", "did", "does"] VB_LIST = ["VBZ", "VBP", "VBD"] class Binary: def convert(self, text, tree): parse_by_structure = [] NEG = 0 NP = 0 VP = 0 for t in tree[0]: if t.label() == "VP": VP = 1 if t.label() == "NP": NP = 1 if t.label() != "VP": parse_by_structure += (t.leaves()) else: for tt in t: if tt.label() != "RB":
def testSingleNote(self): sheet = open('data/single_note.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
def testEmpty(self): sheet = open('data/empty.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
def testBarOverlap(self): sheet = open('data/bar_overlap.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
def testMultipleNotes(self): sheet = open('data/multiple_notes.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
def write_vm1(inputfile, in_filename, ctabel, cw): par = Parse(inputfile, ctabel) cw.setInputname(in_filename) while par.hasMoreCommands(): par.advance() ctype = par.cmdType() if ctype == "C_ARITHMETIC": cw.writeArithmetic(par.arg1()) elif ctype == "C_PUSH" or ctype == "C_POP": cw.writePushPop(ctype, par.arg1(), int(par.arg2())) elif ctype == "C_LABEL": cw.writeLabel(par.arg1()) elif ctype == "C_IF": cw.writeIf(par.arg1()) elif ctype == "C_GOTO": cw.writeGoto(par.arg1()) elif ctype == "C_FUNCTION": cw.writeFunction(par.arg1(), int(par.arg2())) elif ctype == "C_RETURN": cw.writeReturn() elif ctype == "C_CALL": cw.writeCall(par.arg1(), int(par.arg2()))
from parse import Parse # Исходная страница url = "http://yandex.ru" # получаем ссылки первого уровня links = Parse.get_content(None, url) for link in links: print("=================================") print("- " + link[1] + " - " + link[0]) # Получаем ссылки второго уровня l2 = Parse.get_content(None, link[0]) for lnk in l2: print("--- " + lnk[1] + " - " + lnk[0])
def testHarmony(self): sheet = open('data/harmony.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
def __init__(self): TaskThread.__init__(self) self.buf = StringBuffer() self.p = Parse(self.buf) self.p.start()
def on_modified(self, event): self.parse = Parse() if(event.src_path.endswith(".log")): self.parse.set_file_path(event.src_path) self.parse.parse_file()
def setUp(self): self.parse = Parse('entrada.txt')
from TermAtaTimeR import TRetrival from Compressed import CompressedIndex from parse import Parse p = Parse() t = TRetrival() Qarray = [ "the king queen royalty", "servant guard soldier", "hope dream sleep", "ghost spirit", "fool jester player", "to be or not to be", "alas", "alas poor", "alas poor yorick", "antony strumpet" ] trecrun = "shivangising-vectorspace" file = open("vecctorspace.trecrun", "w") rank = 1 for i, q in enumerate(Qarray, 1): Q = q.split(" ") print("Doing the BM25 Retrival for ", q) print() x = t.TermAtATime(Q, 1000, "BM25") for element in x: QueryNumber = "Q" + str(i) scene = p.getScene(element[0]) score = element[1] file.write(QueryNumber + " " + "skip" + " " + scene + " " + str(rank) + " " + str(score) + " " + trecrun + "\n") rank += 1
def testLyrics(self): sheet = open('data/lyrics.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()
binary_q = binary_q[0].lower() + binary_q[1:] si_binary = binary_q if si_binary: return "where " + si_binary else: return None return None def main(self, binary, si, NE): # binary is string rep of binary question if binary: binary_t = Tree.fromstring(Parse.parse(binary)) when = self.when(binary_t) where = self.where(si) why = Why.main(si) what_who = What_Who.main(si, NE) if when: print(" *** when : ", str(when)) if where: print(" *** wher : ", str(where)) if why: print(" *** why : ", str(why)) if what_who: print(" *** www : ", str(what_who)) Parse = Parse() Binary = Binary() Why = Why() What_Who = What_Who()
def fetch_stocks(self, params): filter = Filter() parser = Parse() url = filter.build_query_string(params) results = parser.parse(url, [])
def _old_start(self): print "started" cp = Parse()._bulk_get("CompanyProspect") p = Parse()._bulk_get("Prospect") uc = Parse()._bulk_get("UserContact") cl = Parse().get("ContactList", {"limit": 1000}).json()["results"] print cl cl = pd.DataFrame(cl) print cl.head() cl["user_id"] = [i["objectId"] for i in cl.user] for count, i in enumerate(cp): if "company" in i.keys(): if "domain" in i["company"].keys(): cp[count]["domain"] = i["company"]["domain"] for count, i in enumerate(p): if "company" in i.keys(): if "domain" in i["company"].keys(): p[count]["domain"] = i["company"]["domain"] for count, i in enumerate(uc): if "company" in i.keys(): if "name" in i["company"].keys(): uc[count]["company_name"] = i["company"]["name"] else: uc[count]["company_name"] = "" else: uc[count]["company_name"] = "" # Adding Lists To Contacts / Prospects for count, i in enumerate(cp): if "user" not in i.keys(): continue user_id = i["user"]["objectId"] _cl = cl[(cl.user_id == user_id) & (cl.db_type == "all_company_prospect")] al = cl[(cl.user_id == user_id) & (cl.db_type == "all_feed_prospect")] _cl, al = _cl.to_dict('r'), al.to_dict('r') all_feed_id = al[0]["objectId"] if al else "" list_id = _cl[0]["objectId"] if _cl else "" if "lists" in i.keys(): cp[count]["lists"] = cp[count]["lists"] + [{ "objectId": list_id }, { "objectId": all_feed_id }] else: cp[count]["lists"] = [{ "objectId": list_id }, { "objectId": all_feed_id }] for count, i in enumerate(p): if "user" not in i.keys(): continue user_id = i["user"]["objectId"] _cl = cl[(cl.user_id == user_id) & (cl.db_type == "all_prospect")] al = cl[(cl.user_id == user_id) & (cl.db_type == "all_feed_prospect")] _cl, al = _cl.to_dict('r'), al.to_dict('r') all_feed_id = al[0]["objectId"] if al else "" list_id = _cl[0]["objectId"] if _cl else "" if "lists" in i.keys(): p[count]["lists"] = p[count]["lists"] + [{ "objectId": list_id }, { "objectId": all_feed_id }] else: p[count]["lists"] = [{ "objectId": list_id }, { "objectId": all_feed_id }] for count, i in enumerate(uc): if "user" not in i.keys(): continue db_type, user_id = i["db_type"], i["user"]["objectId"] _cl = cl[(cl.user_id == user_id) & (cl.db_type == db_type)] al = cl[(cl.user_id == user_id) & (cl.db_type == "all_feed_prospect")] _cl, al = _cl.to_dict('r'), al.to_dict('r') all_feed_id = al[0]["objectId"] if al else "" list_id = _cl[0]["objectId"] if _cl else "" if "lists" in i.keys(): uc[count]["lists"] = uc[count]["lists"] + [{ "objectId": list_id }, { "objectId": all_feed_id }] else: uc[count]["lists"] = [{ "objectId": list_id }, { "objectId": all_feed_id }] _p, _cp, _uc = pd.DataFrame(p), pd.DataFrame(cp), pd.DataFrame(uc) #print _p[_p.domain.isnull()].shape, _p.shape #print _cp[_cp.domain.isnull()].shape, _cp.shape # for user pointer add user_contact_list pointer print _p.shape, _cp.shape, _uc.shape i, j, tmp = 0, 0, pd.concat([_cp, _p, _uc]).reset_index() print tmp.domain.drop_duplicates().shape #return for a, b in tmp[["domain", "lists", "company_name", "user"]].groupby("domain"): if a == ".": continue i = i + 1 if b.lists.dropna().sum(): j = j + 1 lists = [ ii["objectId"] for ii in b.lists.dropna().sum() if "objectId" in ii.keys() ] lists = pd.Series(lists).unique().tolist() company_name, domain = b.company_name.tolist()[0], a #print lists, a, b.company_name.tolist()[0] ''' r = requests.post("https://clear-spark.herokuapp.com/v1/clearspark/daily_news", #r = requests.post("http://localhost:4000/v1/clearspark/daily_news", headers={'Content-type': 'application/json'}, data=json.dumps({"company_name":company_name,"domain":domain, "lists":lists,"source":"blog"})) print r.text ''' api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8" #if i > 2: break x = 600000 #job = q.enqueue(Companies()._news, domain, api_key, company_name, timeout=x) company_name = self.remove_non_ascii(company_name) domain = self.remove_non_ascii(domain) print j, company_name, domain #, lists, tmp.shape job = q.enqueue(Companies()._daily_secondary_research, company_name, domain, api_key, lists, timeout=60000) ''' job = q.enqueue(Companies()._recent_webpages_published, domain, api_key, company_name, timeout=60000) #time.sleep(0.5) #print lists job.meta["lists"] = lists job.meta["_lists"] = lists job.save() #RQueue()._meta(job, "lists", lists) ''' '''
class MainForm: def __init__(self): self.root = Tk() self.root.title("Plot") self.root.attributes("-zoomed", True) self.title = "Calculus" ################################################################# # User congfig area ################################################################# # Global config self.getVectorMethod = "Gaussian" self.getMcLaurinSeriesMethod = "Analytically" # Justify the plot self.left = -50 self.right = 50 self.seq = 1000 self.thresshole = 500 ################################################################# # End user config area ################################################################# # Plot function self.parse = None self.option = "root" self.inputChanged = False # Get root information self.root.update() self.width = self.root.winfo_width() self.height = self.root.winfo_height() self.__DPI = 110.0 # Average DPI for most monitor self.root.geometry("{}x{}".format(self.width, self.height)) # Matrix self.matrix = Matrix() self.saveSetPoint = False # Loading GUI self.GUI() # Event Handler self.state = False self.root.bind("<F11>", self.toggle_fullscreen) self.root.bind("<Escape>", self.end_fullscreen) def graph(self): self.parse_func() # Root f = open("calculus.dot", 'w+') f.write("graph " + self.title + " {\n") f.write('\tnode [ fontname = "Arial"]\n') self.parse.root.toGraph(-1, f) f.write('}') f.close() G = AGraph("calculus.dot") G.draw("calculus.png", prog='dot') # Derivative analyticaly f = open("derivative.dot", 'w+') f.write("graph derivative {\n") f.write('\tnode [ fontname = "Arial"]\n') if (self.parse.derivative != None): self.parse.derivative.toGraph(-1, f) f.write('}') f.close() G = AGraph("derivative.dot") G.draw("derivative.png", prog='dot') messagebox.showinfo("Info", "Success") def plot(self): self.parse_func() # Plot self.a.set_title(self.title, fontsize=16) if (self.option == "root"): t = numpy.linspace( self.left, self.right, (numpy.abs(self.right) + numpy.abs(self.left)) * self.seq + 1) self.a.plot(t, self.function(t, self.parse.function, h=0.0000000001), color='red') title = self.parse.str_function elif (self.option == "da"): t = numpy.linspace( self.left, self.right, (numpy.abs(self.right) + numpy.abs(self.left)) * self.seq + 1) self.a.plot(t, self.function(t, self.parse.function_da, h=0.0000000001), color='blue') title = self.parse.str_function_da elif (self.option == "dq"): t = numpy.linspace( self.left, self.right, (numpy.abs(self.right) + numpy.abs(self.left)) * self.seq + 1) self.a.plot(t, self.function(t, self.parse.function_dq, h=0.0000000001), color='orange') title = self.parse.str_function_dq elif (self.option == "fill"): t = numpy.linspace( int(self.left_bound.get()), int(self.right_bound.get()), (numpy.abs(int(self.left_bound.get())) + numpy.abs(int(self.right_bound.get()))) * self.seq + 1) self.a.fill_between(t, 0, self.function(t, self.parse.function), color="green") title = "Area: {}".format(self.lbl_riemann) elif (self.option == "McLaurin"): vector = self.getMcLaurinVector( 0, depth=int(self.depth_input.get()), method=self.getMcLaurinSeriesMethod) mclaurrin = "{}".format(vector[0]) for i in range(1, len(vector)): mclaurrin += "+{}*x^{}".format(vector[i], i) t = numpy.linspace( self.left, self.right, (numpy.abs(self.right) + numpy.abs(self.left)) * self.seq + 1) y = numpy.zeros(len(t)) # allocate y with float elements for i in range(len(t)): try: y[i] = vector[0] for j in range(1, len(vector)): y[i] += vector[j] * t[i]**j except Exception as ex: y[i] = numpy.NAN self.a.plot(t, y, color='yellow') title = mclaurrin self.a.set_title(title, fontsize=8) self.canvas.draw() def parse_func(self): if (self.inputChanged): try: print(self.input.get()) self.parse = Parse(self.input.get().replace(" ", "")) except Exception as e: messagebox.showerror( "Error", "Somethings gone wrong, please check your input again") print(e) return self.lbl_function.set(self.parse.str_function) self.lbl_derivative.set(self.parse.str_function_da) self.lbl_derivative_dq.set(self.parse.str_function_dq) try: self.lbl_riemann.set( self.parse.getRiemannIntegrals( float(self.left_bound.get()), float(self.right_bound.get()))) except Exception as e: messagebox.showerror( "Error", "Somethings gone wrong, please check your input again") print(e) return self.inputChanged = False def function(self, t, str_func, h=0.0000000001): y = numpy.zeros(len(t)) # allocate y with float elements for i in range(len(t)): x = t[i] try: y[i] = eval(str_func) if (abs(y[i]) > self.thresshole): y[i] = numpy.NAN pass except Exception as ex: print(ex) y[i] = numpy.NAN return y def clearPlot(self): self.a.clear() self.a.grid(True) self.a.set_title("Calculus", fontsize=16) self.canvas.draw() def setPlotFunction(self, option="linear", func="0"): self.option = option def getMcLaurinSeries(self, x, x0, depth=1, h=0.000001): S = eval(self.parse.function) for i in range(1, depth): S += (self.parse.getDifferenceQuotion(x, depth=i, h=h) * ((x0 - x)**i)) / numpy.math.factorial(i) return S def getMcLaurinVector(self, x, depth=1, method="Newton"): depth += 1 if (method == "Newton"): vector = [] try: vector.append(eval(self.parse.function)) except Exception as ex: messagebox.showerror( "Error!", "Invalid function. Please check your input again!") print(ex) return [] for i in range(1, depth): try: vector.append( self.parse.getDifferenceQuotion(x, depth=i) / numpy.math.factorial(i)) except Exception as ex: messagebox.showerror( "Error!", "Invalid derivative at depth {}. Please try other function!" .format({i})) print(ex) return [] return vector elif (method == "Analytically"): vector = [] derivative = self.parse.root try: vector.append(eval(derivative.toFunction())) except Exception as ex: messagebox.showerror( "Error!", "Invalid function. Please check your input again!") print(ex) return [] for i in range(1, depth): derivative = derivative.getDerivative() derivative = derivative.simplify( ) if derivative != None else notation.Number(data="0") try: vector.append( eval(derivative.toFunction()) / numpy.math.factorial(i)) except Exception as ex: messagebox.showerror( "Error!", "Invalid derivative at depth {}. Please try other function!" .format({i})) print(ex) return [] return vector return [] def setRecordSetpointMode(self, option=False): self.saveSetPoint = option if (option == False): if (self.matrix.setPointLen == 0): messagebox.showerror("Set point is empty") return # Plot v = self.matrix.getVector(method=self.getVectorMethod) if (v == []): messagebox.showerror("Error", "Your setpoints is not continuos") self.matrix.refresh() return t = numpy.linspace( self.left, self.right, (numpy.abs(self.right) + numpy.abs(self.left)) * self.seq + 1) y = numpy.zeros(len(t)) # allocate y with float elements for i in range(len(t)): for j in range(len(v)): y[i] += v[j] * (t[i]**j) self.lbl_recordSetpoint.set("Setpoint record tunred off") poly = "{:.4f}".format(round(v[0], 4)) if (self.matrix.setPointLen >= 2): poly += " + {:.4f}*x".format(round(v[1], 4)) for i in range(2, self.matrix.setPointLen): poly += " + {:.4f}*x^{}".format(round(v[i], 4), i) if (i % 4 == 0): poly += "\n" self.a.set_title(poly, fontsize=8) self.matrix.refresh() self.a.plot(t, y, color='blue') self.canvas.draw() else: self.lbl_recordSetpoint.set("Setpoint record tunred on") def showSetPoint(self): points = self.matrix.setPoint message = "x\ty\n" for tmpMsg in points: message += "{:.4f}\t{:.4f}\n".format(tmpMsg[0], tmpMsg[1]) messagebox.showinfo("Setpoint", message) def showPicture(self, imgDir=None): self.parse_func() if (imgDir != None): img = mpimg.imread(imgDir) else: url = "https://www.graphsketch.com/render.php?\ eqn1_color=1&\ eqn1_eqn={}&\ x_min={}&x_max={}&\ y_min={}&y_max={}&\ x_tick=1&y_tick=1&\ x_label_freq=5&\ y_label_freq=5&\ do_grid=0&\ do_grid=1&\ bold_labeled_lines=0&\ bold_labeled_lines=1&\ line_width=4&\ image_w=850&\ image_h=525".format(self.parse.str_function, self.left, self.right, self.left, self.right).replace(" ", "") try: urlretrieve(url, filename="tmp.png") img = mpimg.imread('tmp.png') os.remove("tmp.png") except Exception as e: messagebox.showerror( "Error", "Somethings gone wrong, please try again later") print(e) return open_new_tab(url.replace("render.php?", "?")) plt.imshow(img) plt.show() def GUI(self): # ========================================================================== # Top Frame # ========================================================================== self.bottomFrame = Frame(self.root, width=self.width, bd=2, relief="raise") self.bottomFrame.pack(side=TOP, fill=BOTH, expand=True) """ Ploting """ # Figure fig = Figure(figsize=(self.width / self.__DPI, self.height / self.__DPI - 1)) self.a = fig.add_subplot(111) self.a.set_title("Calculus", fontsize=16) self.a.set_ylabel("Y", fontsize=14) self.a.set_xlabel("X", fontsize=14) self.a.axhline(linewidth=1, color='black') self.a.axvline(linewidth=1, color='black') self.a.plot([], [], color='red') self.a.grid(True) self.canvas = FigureCanvasTkAgg(fig, master=self.bottomFrame) self.canvas.draw() self.canvas.get_tk_widget().pack(pady=5) # Toolbar self.toolbar = NavigationToolbar2Tk(self.canvas, self.bottomFrame) self.toolbar.update() self.canvas.get_tk_widget().pack() # ========================================================================== # Bottom Frame # ========================================================================== self.topFrame = Frame(self.root, width=self.width, bd=2, relief="raise") self.topFrame.pack(side=TOP, fill=BOTH, expand=True) """ Top Left """ self.topFrameLeft = Frame(self.topFrame, width=self.width / 2) self.topFrameLeft.pack(side=LEFT, expand=True) ### Left self.frameLeft_Lpanel = Frame(self.topFrameLeft) self.frameLeft_Lpanel.pack(side=LEFT, expand=True) self.frameLeft_Lpanel.grid_propagate(1) # Label self.lbl_function = StringVar() self.lbl_function.set("None") self.lbl_derivative = StringVar() self.lbl_derivative.set("None") self.lbl_derivative_dq = StringVar() self.lbl_derivative_dq.set("None") self.lbl_riemann = StringVar() self.lbl_riemann.set("None") Label(self.frameLeft_Lpanel, text="Input").grid(row=0, column=0, sticky=W, padx=2) Label(self.frameLeft_Lpanel, text="Function:").grid(row=1, column=0, sticky=W, padx=2) Label(self.frameLeft_Lpanel, textvariable=self.lbl_function, width=60).grid(row=1, column=1, columnspan=2, sticky=W, padx=2) Label(self.frameLeft_Lpanel, text="Derivative:").grid(row=2, column=0, sticky=W, padx=2) Label(self.frameLeft_Lpanel, textvariable=self.lbl_derivative, width=60).grid(row=2, column=1, columnspan=2, sticky=W, padx=2) Label(self.frameLeft_Lpanel, text="Difference quotion:").grid(row=3, column=0, sticky=W, padx=2) Label(self.frameLeft_Lpanel, textvariable=self.lbl_derivative_dq, width=60).grid(row=3, column=1, columnspan=2, sticky=W, padx=2) Label(self.frameLeft_Lpanel, text="Riemann quotion:").grid(row=4, column=0, sticky=W, padx=2) Label(self.frameLeft_Lpanel, textvariable=self.lbl_riemann, width=60).grid(row=4, column=1, columnspan=2, sticky=W, padx=2) # Input field self.input = Entry(self.frameLeft_Lpanel, width=30) self.input.grid(row=0, column=1, sticky=W, padx=2) self.input.bind("<Button-1>", self.input_changed) # Button Button(self.frameLeft_Lpanel, text="Parse", command=self.parse_func).grid(row=0, column=2, sticky=W, padx=2) ###Right self.frameLeft_Rpanel = Frame(self.topFrameLeft) self.frameLeft_Rpanel.pack(side=LEFT, expand=True) # Label self.lbl_recordSetpoint = StringVar() self.lbl_recordSetpoint.set("Off") Label(self.frameLeft_Rpanel, text="Polynomio").grid(row=0, column=0, columnspan=1, sticky=W, padx=2) Label(self.frameLeft_Rpanel, textvariable=self.lbl_recordSetpoint).grid(row=0, column=1, columnspan=2, sticky=W, padx=2) self.lbl_polynomial = StringVar() self.lbl_polynomial.set("") Label(self.frameLeft_Rpanel, textvariable=self.lbl_polynomial).grid(row=2, column=0, columnspan=3, sticky=W, padx=2) # Button Button(self.frameLeft_Rpanel, text="Show setpoint", command=self.showSetPoint).grid(row=1, column=0, padx=2) Button(self.frameLeft_Rpanel, text="Record setpoint", command=lambda: self.setRecordSetpointMode(option=True)).grid( row=1, column=1, padx=2) Button(self.frameLeft_Rpanel, text="Get polynomial", command=lambda: self.setRecordSetpointMode(option=False)).grid( row=1, column=2, padx=2) """ Top Right """ self.topFrameRight = Frame(self.topFrame, width=self.width / 2) self.topFrameRight.pack(side=LEFT, expand=True) ### Right self.frameRightOption = Frame(self.topFrameRight) self.frameRightOption.pack(side=LEFT, expand=True) #Button Button(self.frameRightOption, text="Plot", command=self.plot).grid(row=0, column=0, columnspan=1, rowspan=5, padx=2) # Input self.left_bound = Entry(self.frameRightOption, width=5) self.left_bound.insert(0, self.left) self.left_bound.grid(row=3, column=2, sticky=W, padx=2) self.right_bound = Entry(self.frameRightOption, width=5) self.right_bound.insert(0, self.right) self.right_bound.grid(row=3, column=3, sticky=W, padx=2) self.a_input = Entry(self.frameRightOption, width=5) self.a_input.insert(0, "0") self.a_input.grid(row=4, column=2, sticky=W, padx=2) self.depth_input = Entry(self.frameRightOption, width=5) self.depth_input.insert(0, "8") self.depth_input.grid(row=4, column=3, sticky=W, padx=2) # Ratio button v = IntVar() Radiobutton(self.frameRightOption, text="Parsed function", padx=2, pady=2, command=lambda: self.setPlotFunction(option="root"), variable=v, value=0).grid(row=0, column=1, padx=2, sticky=W) Radiobutton(self.frameRightOption, text="Function derivative", padx=2, pady=2, command=lambda: self.setPlotFunction(option="da"), variable=v, value=1).grid(row=1, column=1, padx=2, sticky=W) Radiobutton(self.frameRightOption, text="Function diferrence quotion", padx=2, pady=2, command=lambda: self.setPlotFunction(option="dq"), variable=v, value=2).grid(row=2, column=1, padx=2, sticky=W) Radiobutton(self.frameRightOption, text="Riemann integrals", padx=2, pady=2, command=lambda: self.setPlotFunction(option="fill"), variable=v, value=3).grid(row=3, column=1, padx=2, sticky=W) Radiobutton(self.frameRightOption, text="Mc Laurin series", padx=2, pady=2, command=lambda: self.setPlotFunction(option="McLaurin"), variable=v, value=4).grid(row=4, column=1, padx=2, sticky=W) ### Left self.frameRightButton = Frame(self.topFrameRight) self.frameRightButton.pack(side=LEFT, expand=True, padx=50) # Button Button(self.frameRightButton, text="Export Graph", command=self.graph, width=12).grid(row=0, column=0, padx=2, sticky=E) Button(self.frameRightButton, text="Clean Canvas", command=self.clearPlot, width=12).grid(row=1, column=0, padx=2, sticky=E) Button(self.frameRightButton, text="Validate", command=self.showPicture, width=12).grid(row=2, column=0, padx=2, sticky=E) #region Event def input_changed(self, event): self.inputChanged = True def canvas_on_key_hover(self, event): key_press_handler(event, self.canvas, self.toolbar) def canvas_on_click(self, event): if (self.saveSetPoint == True): self.matrix.addSetPoint(float(event.xdata), float(event.ydata)) self.a.plot(event.xdata, event.ydata, 'rs', color="black") self.canvas.draw() def toggle_fullscreen(self, event=None): self.state = not self.state # Just toggling the boolean self.root.attributes("-fullscreen", self.state) return "break" def end_fullscreen(self, event=None): self.state = False self.root.attributes("-fullscreen", False) return "break"
def main(argv=[]): #If there are no arguments, use empty.txt if len(sys.argv) <= 1: sheet = open('data/empty.txt', 'r') else: #Else, try opening the file and using it. If failed, use empty.txt try: sheet = open(sys.argv[1], 'r') parse = Parse(sheet) except: print("Corrupted file error. Starting from an empty file.") sheet = open('data/empty.txt', 'r') parse = Parse(sheet) selection = 0 #List the different choices for the user while selection != 6: print("What would you like to do with the sheet music?\n") print("1. Modify a note") print("2. Add a harmony note") print("3. Edit song info") print("4. Modify lyrics") print("5. Save to file") print("6. Exit Sheet Music Maker\n") #Print the staff in its current condition parse.printStaff() try: selection = int(raw_input("\nSelection: ")) #This clause is for modifying existing notes, or harmonizing them if selection == 1 or selection == 2: if selection == 1: print("Please select a note to modify...") else: print("Please select a note to harmonize...") #Select the bar from which to pick a note barNo = -1 maxBars = len(parse.staff.notes) while barNo not in range(1, maxBars + 1): print("Enter the bar of the note [1 - %d]:\n" % maxBars) barNo = int(raw_input()) #Select a note from that specified bar maxNotes = len(parse.staff.notes[barNo - 1]) if maxNotes > 1: noteNo = -1 while noteNo not in range(1, maxNotes + 1): print( "Enter the number of the note in the bar [1 - %d]:\n" % maxNotes) noteNo = int(raw_input()) else: noteNo = 1 #Enter a pitch for the note or harmony if selection == 1: pitch = raw_input( "Enter the new pitch of the note [cb1 - g#2] or 'rest':\n" ) else: pitch = raw_input( "Enter the new pitch of the note [cb1 - g#2]") #If harmonizing, the duration must be the same as the note to be harmonized #Otherwise, specify the new duration if selection == 1: duration = raw_input( "Enter the new duration of the note (1/16 - 3/2):\n") parse.modifyNote(barNo, noteNo, pitch, duration) else: parse.addHarmony(barNo, noteNo, pitch) #This clause is for editing the title, author, lenght and time signature elif selection == 3: title = raw_input("Please enter the song title:\n") author = raw_input("Please enter the song author:\n") length = int( raw_input( "Please enter the number of bars in the song:\n")) time = raw_input( "Please enter the time signature of the song:\n") parse.editInfo(title, author, time, length) #Adding Lyrics elif selection == 4: lyrics = raw_input( "Please enter the lyrics on a single line (syllables separated by '-', words by a space\n" ) buf = StringIO.StringIO(lyrics) parse.handleLyrics(buf) buf.close() #Saving the sheet music into a file elif selection == 5: f = open("data/SheetMusicMaker_Output.txt", "w") parse.printStaff(f) f.close() print( "Sheet music written to data/SheetMusicMaker_Output.txt\n") #If an invalid selection was made, raise and error to go to the except-clause elif selection != 6: raise IOError except: #Display what kind of error was raised print("Invalid input:", sys.exc_info()[0]) print("Try again.") selection = 0 #Exit the program on choice No. 6 print("Exiting the program.") sheet.close()
class Backlog: SPACE_KEY = str(os.getenv('SPACE_KEY')) API_KEY = os.getenv('API_KEY') COUNT_PER_API = 100 def __init__(self, project_id): config = BacklogComConfigure(space_key=self.SPACE_KEY, api_key=self.API_KEY) self.issue_api = MyIssue(config) self.issue_attachment_api = MyIssueAttachment(config) self.issue_comment_api = IssueComment(config) self.project_api = Project(config) self.sharedfile_api = MySharedFile(config) self.user_api = MyUser(config) self.wiki_api = Wiki(config) self.wiki_attachment_api = MyWikiAttachment(config) self.parse = Parse() self.project_id = project_id def _add_file(self, func, data, data_type, **kwargs): try: logger.debug(f"[DEBUG]{data_type} ID: {data['id']}の処理を開始") filepath, response = func(**kwargs) logger.info(f'[INFO]Saved {data_type}({data["id"]}): {filepath}') data['path'] = filepath except Exception as e: logger.error( f"{data_type} ID:{data['id']}が取得できませんでした。エラーメッセージ:{e}") def _add_user_icon(self, obj, users_icon): if 'createdUser' in obj and obj['createdUser']['id'] in users_icon: obj['createdUser']['icon'] = users_icon[obj['createdUser']['id']] if 'updatedUser' in obj and obj['updatedUser']['id'] in users_icon: obj['updatedUser']['icon'] = users_icon[obj['updatedUser']['id']] return def _check_status(self, response): res = self._convert_res_to_dict(response) if response.status_code == 200: return res else: logger.error(res) raise EnvironmentError("[ERROR]APIの情報がうまく取得できませんでした...") def _convert_res_to_dict(self, response): """ レスポンスから必要なデータを取得しをdict型に変換 """ logger.debug(f"[DEBUG]Response:{response.content}") try: return json.loads( response.content.decode('utf-8', errors='replace')) except Exception: logger.error("[ERROR]レスポンスがデコードできないため、スキップします。") def _convert_image_link(self, txt, path): """ テキスト中のimageの記法をimgタグに置換 """ if txt is not None: # FIXME:re.subだけで置換できるような気もする... filenames = re.findall(r'!\[image\]\[(.*)\]', txt) if filenames: for filename in filenames: file_image = f'<img src="../{path}{filename}" class="loom-internal-image">' txt = re.sub(r'!\[image\]\[(.*)\]', file_image, txt, 1) return txt def _create_issue_template_data(self, issues, users_icon): # 課題とそのコメントのアイコン追加、マークダウンへの変換処理、添付ファイル取得 for issue in issues: logger.debug(f"[DEBUG]課題ID: {issue['id']}の処理を開始") self._add_user_icon(issue, users_icon) path = f"./output/issues/{issue['id']}/" os.makedirs(path, exist_ok=True) description = self._convert_image_link(issue['description'], path) issue['description'] = self.parse.to_markdown(description) issue['comments'] = self.get_issue_comments(issue['id']) for comment in issue['comments']: logger.debug(f"[DEBUG]コメントID: {comment['id']}の処理を開始") self._add_user_icon(comment, users_icon) content = self._convert_image_link(comment['content'], path) comment['content'] = self.parse.to_markdown(content) logger.info(f'[INFO]Get comments of issue. IssueID: {issue["id"]}') for attachment in issue['attachments']: param = { "issue_id_or_key": issue['id'], "attachment_id": attachment['id'], "download_path": path } self._add_file(self.issue_attachment_api.get_issue_attachment, attachment, 'issue_attachment', **param) for shared_file in issue['sharedFiles']: param = { "project_id_or_key": self.project_id, "shared_file_id": shared_file['id'], "download_path": path } self._add_file(self.sharedfile_api.get_file, shared_file, 'issue_shared_file', **param) return issues def _create_wiki_template_data(self, wikis, users_icon): # Wikiのマークダウンのアイコン追加、変換処理、添付ファイル取得 for wiki in wikis: logger.debug(f"[DEBUG]WikiID: {wiki['id']}の処理を開始") self._add_user_icon(wiki, users_icon) path = f"./output/wikis/{wiki['id']}/" os.makedirs(path, exist_ok=True) content = self._convert_image_link(wiki['content'], path) wiki['content'] = self.parse.to_markdown(content) for attachment in wiki['attachments']: param = { "wiki_id": wiki['id'], "attachment_id": attachment['id'], "download_path": path } self._add_file( self.wiki_attachment_api.get_wiki_page_attachment, attachment, 'wiki_attachment', **param) for shared_file in wiki['sharedFiles']: param = { "project_id_or_key": self.project_id, "shared_file_id": shared_file['id'], "download_path": path } self._add_file(self.sharedfile_api.get_file, shared_file, 'wiki_shared_file', **param) return wikis def get_users(self): response = self.user_api.get_user_list() return self._check_status(response) def get_user_icon(self, user_id, download_path=None): filepath, response = self.user_api.get_user_icon( user_id=user_id, download_path=download_path) logger.info(f'[INFO]Saved user icon: {filepath}') if response.status_code == 200: return filepath, response else: logger.warning(f"[WARNING]ユーザID: {user_id}のアイコンが取得できませんでした。") def get_issues(self): response = self.issue_api.get_issue_list() return self._check_status(response) def get_project(self): response = self.project_api.get_project( project_id_or_key=self.project_id) return self._check_status(response) def get_projects(self): response = self.project_api.get_project_list() return self._check_status(response) def get_project_icon(self): filepath, response = self.project_api.get_project_icon( project_id_or_key=self.project_id) logger.info(f'[INFO]Saved project icon: {filepath}') if response.status_code == 200: return filepath, response else: logger.warning( f"[WARNING]プロジェクトID: {self.project_id}のアイコンが取得できませんでした。") def get_project_issues(self): count_response = self.issue_api.count_issue(project_id=self.project_id) count_res = self._check_status(count_response) file_count = math.ceil(count_res["count"] / self.COUNT_PER_API) issues = [] for i in range(file_count): response = self.issue_api.get_issue_list( project_id=self.project_id, count=self.COUNT_PER_API, offset=i * self.COUNT_PER_API) res = self._check_status(response) issues += res return issues def get_project_users(self): response = self.project_api.get_project_user_list( project_id_or_key=self.project_id) return self._check_status(response) def get_issue_comments(self, issue_id): response = self.issue_comment_api.get_comment_list( issue_id_or_key=issue_id, count=self.COUNT_PER_API, ) return self._check_status(response) def get_wiki_page_list(self): response = self.wiki_api.get_wiki_page_list( project_id_or_key=self.project_id) # 一覧に含まれるWikiにはcontentが含まれないため、改めて取得する wikis_list = self._check_status(response) return [ self._convert_res_to_dict(self.wiki_api.get_wiki_page(wiki['id'])) for wiki in wikis_list ] def get_project_data(self): """ APIデータの取得と整形 """ if not self.SPACE_KEY or not self.API_KEY: raise AttributeError("スペース情報かAPI KEYが取得できませんでした。") if not self.project_id: raise AttributeError("プロジェクトIDを引数に指定してください。") project = self.get_project() logger.info('[INFO]Get project data') issues = self.get_project_issues() logger.info('[INFO]Get project issues') wikis = self.get_wiki_page_list() logger.info('[INFO]Get project wiki') users = self.get_project_users() logger.info('[INFO]Get project users') # プロジェクトデータにアイコンを追加 filepath, response = self.get_project_icon() # FIXME: /outputの直接変換ではなく引数を判定して置換したい project['icon'] = filepath.replace('/output', '') # ユーザアイコンの取得用dict作成 users_icon = {} for user in users: logger.debug(f"[DEBUG]ユーザID: {user['id']} の処理を開始") path = f"./output/users/{user['id']}/" os.makedirs(path, exist_ok=True) try: filepath, response = self.get_user_icon(user['id'], download_path=path) users_icon[user['id']] = filepath except Exception as e: logger.warning( f"[WARNING]ユーザID:{user['id']}のユーザアイコンが取得できませんでした。エラーメッセージ:{e}" ) issues = self._create_issue_template_data(issues, users_icon) wikis = self._create_wiki_template_data(wikis, users_icon) return project, issues, wikis, users
def __init__(self, name=None, **kwargs): super().__init__(name=name, **kwargs) self.headers = settings.Headers self.Parse = Parse()
import logging from parse import Parse if __name__ == '__main__': logging.basicConfig( level="DEBUG", filename="/var/log/dp_more.log", format= "%(asctime)s[%(levelname)s][%(filename)s.%(funcName)s]%(message)s") Parse().parse_all_info()
def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument('infile', nargs='?', type=str, default='-', help="raw data file (stdin by default)") parser.add_argument('-o', '--outfile', type=lambda x: check_file_not_exists(parser, x), default=sys.stdout, metavar='FILE', help="redirect output to a file") parser.add_argument('-b','--baseline', type=str, default="0:20", metavar = 'A:B', help='a range for baseline samples (int means from the beginning, negative int means from the end)' ) parser.add_argument('-s','--signal', type=str, default=None, metavar = 'C:D', help='a range for signal samples to integrate (default: from baseline to the end, for negative baseline is from 0 to baseline)' ) parser.add_argument('--skip', type=int, default=0, metavar = 'N', help='skip first N events (useful if first timestamps are not consequent)') parser.add_argument('--csv', action='store_true', help='output as a .csv') parser.add_argument('--debug', action='store_true') parser.add_argument('--save-bl', action='store_true', help='save calculated baseline value') parser.add_argument('--save-bl-var', action='store_true', help='save baseline standard deviation (to estimate noise)') parser.add_argument('--save-min', action='store_true', help='save the value of absolute minimum') parser.add_argument('--save-max', action='store_true', help='save the value and the bin number of absolute maximum') parser.add_argument('--save-max-idx', action='store_true', help='save sample index of the maximum value') parser.add_argument('--save-len', action='store_true', help='save a number of values more than baseline around the maximum') parser.add_argument('--progress', action='store_true', help="print progress to stderr") args = parser.parse_args() global debug, nevents # --debug, --outfile debug = args.debug outfile = args.outfile # --baseline, --signal rbaseline = map(int, str.split(args.baseline, ':')) # acceptable values: "0:10", "10" , "-10", "-10:-5" if len(rbaseline) > 2: raise ValueError('rbaseline is a:b') if len(rbaseline) is 1: rbaseline = [0] + rbaseline # [10] -> [0,10] if args.signal: rsignal = map(int, str.split(args.signal, ':')) if len(rsignal) is 1: rsignal = [rbaseline[1] ] + rsignal else: rsignal = (rbaseline[1], None) # from just after baseline to the end # --infile infile = sys.stdin if args.infile != '-': try: infile = io.open(args.infile, 'rb') except IOError as e: sys.stderr.write('Err: ' + e.strerror+': "' + e.filename +'"\n') exit(e.errno) splitter = '\t' # --csv if args.csv: splitter = ';' # --skip skip = args.skip # Init parser try: p = Parse(infile) except ValueError as e: sys.stderr.write("Err: %s \n" % e) exit(1) # Catch Ctrl+C signal.signal(signal.SIGINT, fin) # Parse events nevents = 0 fields = list(Features._fields) if not args.save_max: fields.remove('max') if not args.save_min: fields.remove('min') if not args.save_max_idx: fields.remove('max_idx') if not args.save_bl: fields.remove('bl') if not args.save_bl_var: fields.remove('bl_var') if not args.save_len: fields.remove('len') # Print file header outfile.write('# ' + splitter.join(fields) +'\n') for event in p: nevents += 1 if skip and nevents <= skip: continue vals = rintegrate(event, rbaseline, rsignal, fields) if args.progress and (nevents % 10000 == 0): sys.stderr.write("progress: {0:.1f}%\r".format( 100.0 * p.progress())) vals = [getattr(vals, k) for k in fields ] line = splitter.join( map(str, vals) ) outfile.write(line + '\n') sys.stderr.write("{}: ".format(outfile.name)) fin()
def _company_info(self, company_name, api_key=""): #TODO - company_name = self._remove_non_ascii(company_name) add to save qry = {'where':json.dumps({'company_name': company_name}), 'limit':1000} qry['order'] = '-createdAt' crawls = Parse().get('CompanyInfoCrawl', qry).json()['results'] if not crawls: # start crawls return company_name crawls = self._source_score(pd.DataFrame(crawls)) crawls = self._logo_score(crawls) #crawls = crawls[crawls.api_key == api_key] crawls['name_score'] = [fuzz.token_sort_ratio(row['name'], row.company_name) for index, row in crawls.iterrows()] crawls = crawls[crawls.name_score > 70].append(crawls[crawls.name.isnull()]) logo = crawls.sort("logo_score",ascending=False) #logo=logo[(logo.logo != "") & (logo.logo.notnull())][["source","logo"]] logo=logo[(logo.logo != "") & (logo.logo.notnull())].logo.tolist() logo = logo[0] if logo else "" #crawls = crawls[["press", 'source_score', 'source', 'createdAt', 'domain']] final = {} #print crawls.press.dropna() for col in crawls.columns: if col in ['source_score', 'source', 'createdAt']: continue df = crawls[[col, 'source_score', 'source', 'createdAt']] if df[col].dropna().empty: continue if type(list(df[col].dropna())[0]) == list: df[col] = df[col].dropna().apply(tuple) try: df = df[df[col] != ""] except: "lol" try: df = df[df[col].notnull()] df = [source[1].sort('createdAt').drop_duplicates(col, True) for source in df.groupby(col)] df = [_df for _df in df if _df is not None] df = [pd.DataFrame(columns=['source_score',col])] if len(df) is 0 else df df = pd.concat(df).sort('source_score')[col] if list(df): final[col] = list(df)[-1] except: "lol" if 'industry' in final.keys(): try: final['industry'] = final['industry'][0] except: final["industry"] = "" try: final['industry_keywords'] = list(set(crawls.industry.dropna().sum())) except: final['industry_keywords'] = [] if 'address' in final.keys(): final['address'] = FullContact()._normalize_location(final['address']) try: final['handles'] = crawls[['source','handle']].dropna() final['handles'] = final['handles'].drop_duplicates().to_dict('r') except: "lol" tmp = crawls[['source','logo']].dropna() #print tmp #print "THE LOGO", logo final["logo"] = logo final['logos'] = tmp.drop_duplicates().to_dict('r') try: tmp = crawls[['source','phone']].dropna() final['phones'] = tmp.drop_duplicates().to_dict('r') except: """ """ # TODO - if company_name exists update # TODO - find if domain exists under different company_name then update final = self._prettify_fields(final) if "name_score" in final.keys(): del final["name_score"] #print json.dumps(final) self._add_to_clearspark_db('Company', 'company_name', company_name, final) # TODO - find main domain from domain -> ie canon.ca should be canon.com # clean data - ie titleify fields, and lowercase domain # TODO - start a domain search with the deduced domain and the company_name #print "RQUEUE CHECK" if "domain" in final.keys(): domain = final["domain"] ''' if len(RQueue()._results("{0}_{1}".format(company_name, api_key))) == 1: q.enqueue(Companies()._domain_research, domain, api_key, company_name) q.enqueue(Companies()._secondary_research, company_name, domain, api_key) ''' if RQueue()._has_completed("{0}_{1}".format(company_name, api_key)): #q.enqueue(Companies()._domain_research, domain, api_key, company_name) #q.enqueue(Companies()._secondary_research, company_name, domain, api_key) print "WEBHOOK <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" if "company_name" in final.keys(): Webhook()._update_company_info(final) ''' job = q.enqueue(EmailGuess().search_sources, final["domain"],api_key,"") job.meta["{0}_{1}".format(company_name, api_key)] = True job.save() for domain in crawls.domain.dropna().drop_duplicates(): job = q.enqueue(EmailGuess().search_sources, domain, api_key, "") RQueue()._meta(job, "{0}_{1}".format(company_name, api_key)) ''' return final
def add(name, paths): snapshot.set(name) for path in each([p for p in paths]): logger.info("Adding file %s to snapshot %s" % (path, name)) Parse.parser(path)
class Collector(DatagramServer): x = 0 def __init__(self,args): # create logger self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.debug( "Starting Collector process in %s"%os.getcwd()) #self.logger.debug( "Gevent Version %s"%gevent.__version__) #TODO: move output file name to config fname = "./NetFlow.%d.bin"%int(time.time()) #WARN: might want to remove this after testing self.out = open(fname,"wb") #create tool instances self.interface = Interface() self.parse = Parse() self.context = Context() self.describe = Describe() self.standardize = Standardize() self.transform = Transform() self.partition = Partition() self.q = Queue() self.inWindow = settings.SETTINGS.get("collector","inWindow") self.score = Score() #TODO: move csv name to config #self.csv = CSV("output.csv") self.output = Output() return super(Collector,self).__init__(args) def done(self): #pass self.out.close() #really important to call del on the csv obj to ensure it closes correctly #del self.csv def handle(self, rawData, address): Collector.x += 1 #print '%s %s: got %r' % (Collector.x, address[0], rawData) self.out.write(rawData) interfacedData = self.interface.run(rawData) #self.logger.debug("Interface: %s"%(repr(interfacedData))) #once the rawData is "interfaced" we are passing it around by reference # interfaced data must be iterable try: for record in interfacedData: self.parse.run(record) #self.logger.debug("Parse: %s"%(repr(record))) self.context.run(record) #self.logger.debug("Context: %s"%(repr(record))) self.describe.run(record) #self.logger.debug("Describe: %s"%(repr(record))) #push the record onto the queue until window if not (self.inWindow): self.q.put(record) #self.logger.debug("adding record to queue %s"%(repr(record))) if (self.q.qsize() == int(settings.SETTINGS.get("collector","describeWindow"))): #self.logger.debug("Describe Window of %s records met, Begin Processing queue"%settings.SETTINGS.get("collector","describeWindow")) self.inWindow = True while not self.q.empty(): item = self.q.get() #self.logger.debug("processing record from queue %s"%(repr(item))) self.standardize.run(item) self.transform.run(item) self.partition.run(item) #self.csv.writeRow(self.csv.format(item)) self.output.run(item) self.q.task_done() else: self.standardize.run(record) #self.logger.debug("Standardize: %s"%(repr(record))) self.transform.run(record) #self.logger.debug("Transform: %s"%(repr(record))) self.partition.run(record) #self.logger.debug("Partition: %s"%(repr(record))) #self.csv.writeRow(self.csv.format(record)) self.output.run(record) #self.score.run(record) except Exception as e: self.logger.error("Interfaced data is not iterable %s"%(str(e)))
def main(args): """ Example usage: Describe what we do in this file, then give an example of a command you might run on the command line. $ python parse-dailynews.py """ fn = 'dailynews.new' fh = open(fn, 'rb') markup = fh.read() if markup[:3] != ' ': # We have a gunzip'ed file we have to extract. # We know this because the first three characters of www.nydailynews.com urls are always ' '. # Always. They are always ' '. markup = gzip.GzipFile(fn, 'r').read() # Results of this parsing is stored in p.content regexes = { 'header': '<header\ id="rh">.*<div\ id="header-container"\ data-reg-role="header-container"></div>\ </header>', 'footer': '<footer\ id="rf">.*</footer>' } p = Parse() p.regexes = regexes p.regex = 'header' p.extract_parts(markup) p.regex = 'footer' p.extract_parts(markup) # Turn the nav markup into actionable javascript fh = open('html/template-dailynews.js', 'rb') js = fh.read() # When life was simple: #js = js.replace('{{header}}', " ".join(p.content['header'].replace("\n", "\\n").replace("'", "\\'").replace('/', '\/').splitlines())) # Life now: js = js.replace( '{{header}}', " ".join(p.content['header'].replace( 'href="/', 'href="http://www.nydailynews.com/').replace( "http://assets.nydaily", "//www.nydaily").replace("'", "\\'").replace( '/', '\/').replace("\n", "\\n").replace( 'join("\\n")', 'join("\\\\n")').replace('/\\n+$', '/\\\\n+$').replace( 'rh-app.jpg"', 'rh-app.jpg" alt=""').replace( 'rh-subscribe.jpg"', 'rh-subscribe.jpg" alt=""').replace( 'notification.png"', 'notification.png" alt=""'). replace("search_action();'></button>", "search_action();'>SEARCH</button>").splitlines())) js = js.replace( '{{footer}}', p.content['footer'].replace( 'href="/', 'href="http://www.nydailynews.com/').replace( 'article_750', 'article_250').replace( "http://assets.nydaily", "https://www.nydaily").replace( "'", "\\'").replace('/', '\/').replace("\n", "\\n").replace( 'join("\\n")', 'join("\\\\n")').replace( '/\\n+$', '/\\\\n+$').replace( '7.2945742 --> <style>\n', '7.2945742 --> <style>').replace( '\r', '')) fh = open('html/head.html', 'rb') head_markup = fh.read() # Write the file if p.content['header'] != '': f = FileWrapper('output/header.html') f.write(p.content['header']) f = FileWrapper('output/header-iframeable.html') f.write('%s%s' % (head_markup, p.content['header'])) if p.content['footer'] != '': f = FileWrapper('output/footer.html') f.write(p.content['footer']) f = FileWrapper('output/footer-iframeable.html') f.write('%s%s' % (head_markup, p.content['footer'])) if p.content['footer'] != '' and p.content['header'] != '': f = FileWrapper('output/vendor-include.js') f.write(js)
# 'icebox_features': {'target': planning_nextup_id, 'filters': {'filter': 'id:43068683,41430591,33693103,33691827,32161881,37317209'}} # 'icebox_bugs': {'target': bugs_inbox_id, 'filters': {'filter': 'id:41753701,38746909'}} } print '############################' movement = movements['current_inprogress2'] stories = pivotal.getStories(movement['filters']) # print stories['stories']['story'] target_list = trello.getList(movement['target']) print target_list.getListInformation() parse = Parse() i = 1 for story in stories: print '========================================== %d' % i try: data = parse.parseData(story) print data card = trello.addCard(target_list, data) print card.getCardInformation() i += 1 except Exception, e: print story['id']['#text'] raise e # cards = planning_nextup_list.getCards() # for card in cards:
Main module Launch program and web server """ import sys from flask import Flask, render_template from parse import Parse from features import Features from classification import Classification if __name__ == '__main__': if len(sys.argv) > 1: print 'Parsing...', sys.stdout.flush() p = Parse(sys.argv[1]) p.compute_fqdn() print 'DONE' print 'Computing features (Can take some time because of whois queries)...', sys.stdout.flush() features = Features(p) features.compute() print 'DONE' print 'Classification...', sys.stdout.flush() classification = Classification(features, p) classification.compute() print 'DONE'
help='Log file name for tegrastats data') parser.add_argument( '--verbose', '-v', action='store_true', help='Prints verbose messages while running tegrastats') parser.add_argument( '--only_parse', '-p', action='store_true', help='Parse tegrastats log file without running tegrastats') parser.add_argument( '--graph', '-g', action='store_true', help='Plots some useful graphs from tegrastats data parsed') options = parser.parse_args() tegrastats = Tegrastats(options.interval, options.log_file, options.verbose) parser = Parse(options.interval, options.log_file) if not options.only_parse: status = tegrastats.run() csv_file = parser.parse_file() if options.graph: graph = Graph(csv_file) graph.plots()
if __name__ == '__main__': time1 = time.time() parser = argparse.ArgumentParser() parser.add_argument( '-u', "--url", type=str, default= 'https://www.qcsanbao.cn/webqcba/DVMProducerServlet?method=getWhereList&p=1', help="要爬取的网站") args = parser.parse_args() url = args.url base_url = configs["basic_url"] r = get_redis_connect() dl = Download() par = Parse() # 制作列表页的url_list make_url_list( base_url, par.parse_main_page_get_total_pagenum( dl.download_first_page(url, logger), configs["test"])) threading_list = [] # 列表页的解析详情页的数据url,存放在redis中,并且下载列表页html threading_list.extend([ Thread(target=download_and_parse_page, args=("url_list", r, par.parse_main_page_get_detail_page_url, dl.download_first_page, dl.download_list_page_html, lock, logger)) for _ in range(configs["thread_num"])
class MyHandler(FileSystemEventHandler): def on_modified(self, event): self.parse = Parse() if(event.src_path.endswith(".log")): self.parse.set_file_path(event.src_path) self.parse.parse_file()
def testRests(self): sheet = open('data/rests.txt', 'r') parse = Parse(sheet) parse.printStaff() sheet.close()