def test_prev(self, simple): """ Test prev property """ # self.TEI.parse() # Normal passage checking p = self.TEI.getTextualNode(["2", "40", "8"], simple=simple) self.assertEqual(str(p.prev.reference), "2.40.7") p = self.TEI.getTextualNode(["2", "40"], simple=simple) self.assertEqual(str(p.prev.reference), "2.39") p = self.TEI.getTextualNode(["2"], simple=simple) self.assertEqual(str(p.prev.reference), "1") # test failing passage p = self.TEI.getTextualNode(["1", "pr", "1"], simple=simple) self.assertEqual(p.prev, None) p = self.TEI.getTextualNode(["1", "pr"], simple=simple) self.assertEqual(p.prev, None) p = self.TEI.getTextualNode(["1"], simple=simple) self.assertEqual(p.prev, None) # First child should get to parent's prev last child p = self.TEI.getTextualNode(["1", "1", "1"], simple=simple) self.assertEqual(str(p.prev.reference), "1.pr.22") # Beginning of lowest level passage and beginning of parent level p = self.TEI.getTextualNode(["2", "pr", "sa"], simple=simple) self.assertEqual(str(p.prev.reference), "1.39.8")
def test_scan_video_invalid_extension(movies, tmpdir, monkeypatch): monkeypatch.chdir(str(tmpdir)) movie_name = os.path.splitext(movies['man_of_steel'].name)[0] + '.mp3' tmpdir.ensure(movie_name) with pytest.raises(ValueError) as excinfo: scan_video(movie_name) assert str(excinfo.value) == '.mp3 is not a valid video extension'
def build_uri(self, base, matches): if not base: return None if self.uriTemplate: expanded = str(self.uriTemplate) elif self.fragmentTemplate: if "#" in base: base += self.space.fragmentSeparator else: base += "#" expanded = base + str(self.fragmentTemplate) else: return None expanded = expanded.replace("{+base}", base) for var, value in matches.items(): slug = self.transform_value(value) expanded = expanded.replace("{%s}" % var, slug) # if base is eg "http://localhost/res/" and expanded is a # /-prefixed relative uri like "/sfs/9999:998", urljoin # results in "http://localhost/sfs/9999:998/", not # "http://localhost/res/" like you'd expect. So we work # around. if expanded[0] == "/": expanded = expanded[1:] if expanded.startswith("http://") or expanded.startswith("https://"): return urljoin(base, expanded) else: # see the test integrationLegalURI.CustomCoinstruct.test_1845_50_s.1 return "%s/%s" % (base, expanded)
def config_string(self): """Generate a '|' delimited string of instance attributes, for saving to config.ini.""" return '|'.join([ self.name, self.url, self.api_key, self.cat_ids, str(int(self.enabled)), self.search_mode, str(int(self.search_fallback)), str(int(self.enable_daily)), str(int(self.enable_backlog)), str(int(self.enable_manualsearch)) ])
def select(self, query, format="sparql"): # FIXME: workaround for the fact that rdflib select uses FROM # <%s> differently than Sesame/Fuseki. We remove the 'FROM # <%s>' part from the query and instead get a context graph # for the same URI. re_fromgraph = re.compile(r" FROM <(?P<graphuri>[^>]+)> ") graphuri = None m = re_fromgraph.search(query) if m: graphuri = m.group("graphuri") query = re_fromgraph.sub(" ", query) try: res = self._getcontextgraph(graphuri).query(query) except pyparsing.ParseException as e: raise errors.SparqlError(e) if format == "sparql": return res.serialize(format="xml") elif format == "json": return res.serialize(format="json") else: # or just # return self._sparql_results_to_list(res.serialize(format="xml")) l = [] for r in res.bindings: d = {} for (key, val) in r.items(): d[str(key)] = str(val) l.append(d) return l
def getprevnext(passage, request_urn, output=XML): _prev = "" _next = "" if passage.prev: _prev = URN("{}:{}".format(passage.urn.upTo(URN.VERSION), str(passage.prev))) if passage.next: _next = URN("{}:{}".format(passage.urn.upTo(URN.VERSION), str(passage.next))) if output == XML: return """ <GetPrevNext xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns="http://chs.harvard.edu/xmlns/cts"> <request> <requestName>GetPrevNext</requestName> <requestUrn>{request_urn}</requestUrn> </request> <reply> <urn>{full_urn}</urn> <prevnext> <prev><urn>{prev}</urn></prev> <next><urn>{next}</urn></next> </prevnext> </reply> </GetPrevNext>""".format( request_urn=request_urn, full_urn=str(passage.urn), prev=str(_prev), next=str(_next) )
def test_prevnext_on_close_to_last_passage(self): passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39.2-2.40.5")) self.assertEqual( str(passage.nextId), "2.40.6-2.40.8", "Next reff should finish at the end of the text, no matter the length of the reference", ) self.assertEqual(str(passage.prevId), "2.37.9-2.39.1", "Prev reff should be the same length as sibling")
def test_first_list(self): passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39")) self.assertEqual(str(passage.firstId), "2.39.1", "First reff should be the first") self.assertEqual(str(passage.lastId), "2.39.2", "Last reff should be the last") passage = self.text.getPassage(MyCapytain.common.reference.Reference("2.39-2.40")) self.assertEqual(str(passage.firstId), "2.39.1", "First reff should be the first") self.assertEqual(str(passage.lastId), "2.40.8", "Last reff should be the last")
def test_prevnext_on_close_to_first_passage(self): passage = self.text.getPassage(MyCapytain.common.reference.Reference("1.pr.10-1.2.1")) self.assertEqual(str(passage.nextId), "1.2.2-1.4.1", "Next reff should be the same length as sibling") self.assertEqual( str(passage.prevId), "1.pr.1-1.pr.9", "Prev reff should start at the beginning of the text, no matter the length of the reference", )
def _load_resources(self, resource_path): # returns a mapping [resource label] => [resource uri] # resource_path is given relative to cwd graph = Graph() graph.load(resource_path, format='n3') d = {} for uri, label in graph.subject_objects(RDFS.label): d[str(label)] = str(uri) return d
def test_highest(self): self.assertEqual( str((Reference("1.1-1.2.8")).highest), "1.1", "1.1 is higher" ) self.assertEqual( str((Reference("1.1-2")).highest), "2", "2 is higher" )
def test_properties(self): a = Reference("[email protected]@Atreus[3]") self.assertEqual(str(a.start), "1.1@Achilles") self.assertEqual(a.start.list, ["1", "1"]) self.assertEqual(a.start.subreference[0], "Achilles") self.assertEqual(str(a.end), "1.10@Atreus[3]") self.assertEqual(a.end.list, ["1", "10"]) self.assertEqual(a.end.subreference[1], 3) self.assertEqual(a.end.subreference, ("Atreus", 3))
def test_Unicode_Support(self): a = Reference("1.1@καὶ[0]-1.10@Ἀλκιβιάδου[3]") self.assertEqual(str(a.start), "1.1@καὶ[0]") self.assertEqual(a.start.list, ["1", "1"]) self.assertEqual(a.start.subreference[0], "καὶ") self.assertEqual(str(a.end), "1.10@Ἀλκιβιάδου[3]") self.assertEqual(a.end.list, ["1", "10"]) self.assertEqual(a.end.subreference[1], 3) self.assertEqual(a.end.subreference, ("Ἀλκιβιάδου", 3))
def test_xml_Work_GetItem(self): """ Test access through getItem obj[urn] """ TI = TextInventory(resource=self.getCapabilities, id="TestInv") tg = TI["urn:cts:latinLit:phi1294"] self.assertIsInstance(tg["urn:cts:latinLit:phi1294.phi002"], Work) self.assertEqual(str(tg["urn:cts:latinLit:phi1294.phi002"].urn), "urn:cts:latinLit:phi1294.phi002") self.assertIsInstance(tg["urn:cts:latinLit:phi1294.phi002.perseus-lat2"], Text) self.assertEqual( str(tg["urn:cts:latinLit:phi1294.phi002.perseus-lat2"].urn), "urn:cts:latinLit:phi1294.phi002.perseus-lat2" )
def test_str(self): a = Citation(name="book", xpath="/tei:div[@n=\"?\"]", scope="/tei:TEI/tei:body/tei:text/tei:div") self.assertEqual( str(a),"<tei:cRefPattern n=\"book\" matchPattern=\"(\\w+)\" replacementPattern=\"#xpath(/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"$1\"])\"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>" ) b = Citation(name="chapter", xpath="/tei:div[@n=\"?\"]", scope="/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"?\"]") self.assertEqual( str(b),"<tei:cRefPattern n=\"chapter\" matchPattern=\"(\\w+)\.(\\w+)\" replacementPattern=\"#xpath(/tei:TEI/tei:body/tei:text/tei:div/tei:div[@n=\"$1\"]/tei:div[@n=\"$2\"])\"><tei:p>This pointer pattern extracts chapter</tei:p></tei:cRefPattern>" ) a = Citation() self.assertEqual(str(a), "")
def mock_volume(override=None): volume_model = { "id": str(uuid.uuid4()), "sizeInKb": (8 * constants.GIGABYTE) // constants.KILOBYTE, "storagePoolId": str(uuid.uuid4()), "useRmcache": False, "volumeType": constants.VOLUME_TYPE_THICK, "mappedSdcInfo": [] } volume_model.update(override or {}) return volume_model
def __init__(self, resource): self.base = str(resource.value(COIN.base)) self.fragmentSeparator = str(resource.value(COIN.fragmentSeparator)) self.slugTransform = SlugTransformer(resource.value(COIN.slugTransform)) self.templates = [Template(self, template_resource) for template_resource in resource.objects( COIN.template)] # primary sort order by :priority # secondary sort by type specificity (wether a self.forType is specified) # tertiary sort order by specificity (number of vars per template) self.templates.sort(key=lambda x: (x.priority, x.forType, len(x.bindings)), reverse=True)
def test_run_revsort(self): outDir = self._createTempDir() self._tester('src/toil/test/cwl/revsort.cwl', 'src/toil/test/cwl/revsort-job.json', outDir, { # Having unicode string literals isn't necessary for the assertion but makes for a # less noisy diff in case the assertion fails. u'output': { u'location': "file://" + str(os.path.join(outDir, 'output.txt')), u'basename': str("output.txt"), u'size': 1111, u'class': u'File', u'checksum': u'sha1$b9214658cc453331b62c2282b772a5c063dbd284'}})
def test_particle(): assert str(Eun) == u'은(는)' assert str(Eul) == u'을(를)' assert str(Ida) == u'(이)' if PY2: try: __import__('unidecode') except ImportError: assert repr(Ida) == u"<Particle: u'(\\uc774)'>" else: assert repr(Ida) == u'<Particle: (i)>' else: assert repr(Ida) == u'<Particle: (이)>'
def test_get_parent(self): a = Reference("1.1") b = Reference("1") c = Reference("1.1-2.3") d = Reference("1.1-1.2") e = Reference("1.1@Something[0]-1.2@SomethingElse[2]") f = Reference("1-2") self.assertEqual(str(a.parent), "1") self.assertEqual(b.parent, None) self.assertEqual(str(c.parent), "1-2") self.assertEqual(str(d.parent), "1") self.assertEqual(str(e.parent), "1@Something[0]-1@SomethingElse[2]") self.assertEqual(f.parent, None)
def test_prev_prev_next_property(self): """ Test reference property As of 0.1.0, .next and prev are URNs """ passage = Passage( urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1", resource=GET_PASSAGE, retriever=self.endpoint ) # When next does not exist from the original resource self.assertEqual(str((URN(passage.prevId)).reference), "1.pr") self.assertEqual(str((URN(passage.nextId)).reference), "1.2") self.endpoint.getPrevNextUrn.assert_called_with(urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1")
def test_get_passage_hyper_context_double_slash_xpath(self): simple = self.seneca.getTextualNode(Reference("1-10")) str_simple = simple.export( output=Mimetypes.XML.Std ) text = Text( resource=str_simple, citation=self.seneca.citation ) self.assertEqual( text.getTextualNode(Reference("1"), simple=True).export( output=Mimetypes.PLAINTEXT, exclude=["tei:note"] ).strip(), "Di coniugales tuque genialis tori,", "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) self.assertEqual( text.getTextualNode(Reference("10"), simple=True).export( output=Mimetypes.PLAINTEXT ).strip(), "aversa superis regna manesque impios", "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) self.assertEqual( list(map(lambda x: str(x), text.getValidReff(level=1))), ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) simple = self.seneca.getTextualNode(Reference("1")) str_simple = simple.tostring(encoding=str) text = Text( resource=str_simple, citation=self.seneca.citation ) self.assertEqual( text.getTextualNode(Reference("1"), simple=True).export( output=Mimetypes.PLAINTEXT, exclude=["tei:note"] ).strip(), "Di coniugales tuque genialis tori,", "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) self.assertEqual( list(map(lambda x: str(x), text.getValidReff(level=1))), ["1"], "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" )
def getValidReff(self, level=1, reference=None): """ Cached method of the original object :param level: :param reference: Reference object :return: References """ __cachekey__ = _cache_key("Text_GetValidReff", level, str(self.urn), str(reference)) __cached__ = self.cache.get(__cachekey__) if __cached__: return __cached__ else: __cached__ = super(Text, self).getValidReff(level, reference) self.cache.set(__cachekey__, __cached__, timeout=Text.TIMEOUT["getValidReff"]) return __cached__
def testFindCitation(self): self.assertEqual( str(self.TEI.citation), '<tei:cRefPattern n="book" matchPattern="(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\'])"><tei:p>This pointer pattern extracts book</tei:p></tei:cRefPattern>' ) self.assertEqual( str(self.TEI.citation.child), '<tei:cRefPattern n="poem" matchPattern="(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\']/tei:div[@n=\\\'$2\\\'])"><tei:p>This pointer pattern extracts poem</tei:p></tei:cRefPattern>' ) self.assertEqual( str(self.TEI.citation.child.child), '<tei:cRefPattern n="line" matchPattern="(\\w+)\.(\\w+)\.(\\w+)" replacementPattern="#xpath(/tei:TEI/tei:text/tei:body/tei:div/tei:div[@n=\\\'$1\\\']/tei:div[@n=\\\'$2\\\']/tei:l[@n=\\\'$3\\\'])"><tei:p>This pointer pattern extracts line</tei:p></tei:cRefPattern>' ) self.assertEqual(len(self.TEI.citation), 3)
def assert_cmd_exit_equals(self, cmd, main_func, expected): sys.argv = re.sub(' +', ' ', cmd).split(' ') try: main_func() assert("exit() not called.") except SystemExit as e: self.assertEqual(str(e), expected)
def test_get_passage_hypercontext_complex_xpath(self): simple = self.text_complex.getTextualNode(Reference("pr.1-1.2")) str_simple = simple.tostring(encoding=str) text = Text( resource=str_simple, citation=self.text_complex.citation ) self.assertIn( "Pervincis tandem", text.getTextualNode(Reference("pr.1"), simple=True).export( output=Mimetypes.PLAINTEXT, exclude=["tei:note"]).strip(), "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) self.assertEqual( text.getTextualNode(Reference("1.2"), simple=True).export( output=Mimetypes.PLAINTEXT).strip(), "lusimus quos in Suebae gratiam virgunculae,", "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" ) self.assertEqual( list(map(lambda x: str(x), text.getValidReff(level=2))), [ "pr.1", "1.1", "1.2" ], "Ensure passage finding with context is fully TEI / Capitains compliant (Different level range Passage)" )
def test_urn(self): """ Test setters and getters for urn """ # Should work with string self.TEI.urn = "urn:cts:latinLit:tg.wk.v" self.assertEqual(isinstance(self.TEI.urn, MyCapytain.common.reference.URN), True) self.assertEqual(str(self.TEI.urn), "urn:cts:latinLit:tg.wk.v") # Test for URN self.TEI.urn = MyCapytain.common.reference.URN("urn:cts:latinLit:tg.wk.v2") self.assertEqual(isinstance(self.TEI.urn, MyCapytain.common.reference.URN), True) self.assertEqual(str(self.TEI.urn), "urn:cts:latinLit:tg.wk.v2") # Test it fails if not basestring or URN with self.assertRaises(TypeError): self.TEI.urn = 2
def parse_links(html, encoding=None): """Process all links in given html and replace them if markup is added.""" if encoding is None: encoding = settings.DEFAULT_CHARSET # The passed HTML may be a string or bytes, depending on what is calling # this method. For example, Django response.content is always bytes. We # always want this content to be a string for our purposes. html_as_text = force_text(html, encoding=encoding) # This call invokes Wagail-specific logic that converts references to # Wagtail pages, documents, and images to their proper link URLs. expanded_html = expand_db_html(html_as_text) soup = BeautifulSoup(expanded_html, 'html.parser') link_tags = get_link_tags(soup) for tag in link_tags: original_link = str(tag) link_with_markup = add_link_markup(tag) if link_with_markup: expanded_html = expanded_html.replace( original_link, link_with_markup ) return expanded_html
def test_children(self): """ Test next property, given that next information already exists or not) """ self.endpoint.getPassage = mock.MagicMock(return_value=GET_PASSAGE) self.endpoint.getPrevNextUrn = mock.MagicMock(return_value=NEXT_PREV) self.endpoint.getFirstUrn = mock.MagicMock(return_value=Get_FIRST) self.endpoint.getValidReff = mock.MagicMock(return_value=GET_VALID_REFF_1_1) passage = Text( urn="urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1", retriever=self.endpoint ) self.assertEqual(len(list(passage.children)), 6) self.assertEqual( [str(x.urn) for x in passage.children], [ "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.1", "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.2", "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.3", "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.4", "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.5", "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1.6" ], "Passage should be retrieved and have the correct URN" )
def test_getpassageplus(self, requests): text = Text("urn:cts:latinLit:phi1294.phi002.perseus-lat2", self.endpoint) requests.return_value.text = GET_PASSAGE_PLUS # Test with -1 passage = text.getPassagePlus(reference="1.1") requests.assert_called_with( "http://services.perseids.org/api/cts", params={ "request": "GetPassagePlus", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1" } ) self.assertIsInstance(passage, Passage) self.assertEqual(str(passage.urn), "urn:cts:latinLit:phi1294.phi002.perseus-lat2:1.1") self.assertEqual( passage.xml.findall(".//{http://www.tei-c.org/ns/1.0}l[@n='1']")[0].text, "Hic est quem legis ille, quem requiris, " ) self.assertEqual(text.citation.name, "book") self.assertEqual(len(text.citation), 3) # Test without reference text.getPassagePlus() requests.assert_called_with( "http://services.perseids.org/api/cts", params={ "request": "GetPassagePlus", "urn": "urn:cts:latinLit:phi1294.phi002.perseus-lat2" } )
def _ep_data(self, ep_obj): """ Creates an elementTree XML structure for a MediaBrowser style episode.xml and returns the resulting data object. show_obj: a Series instance to create the NFO for """ eps_to_write = [ep_obj] + ep_obj.related_episodes persons_dict = {u'Director': [], u'GuestStar': [], u'Writer': []} my_show = self._get_show_data(ep_obj.series) if not my_show: return None root_node = etree.Element(u'Item') # write an MediaBrowser XML containing info for all matching episodes for ep_to_write in eps_to_write: try: my_ep = my_show[ep_to_write.season][ep_to_write.episode] except (IndexerEpisodeNotFound, IndexerSeasonNotFound): log.info( u'Unable to find episode {number} on {indexer}... has it been removed? Should I delete from db?', { u'number': episode_num(ep_to_write.season, ep_to_write.episode), u'indexer': indexerApi(ep_obj.series.indexer).name }) return None if ep_to_write == ep_obj: # root (or single) episode # default to today's date for specials if firstaired is not set if ep_to_write.season == 0 and not getattr( my_ep, u'firstaired', None): my_ep[u'firstaired'] = str(datetime.date.fromordinal(1)) if not (getattr(my_ep, u'episodename', None) and getattr(my_ep, u'firstaired', None)): return None episode = root_node if ep_to_write.name: episode_name = etree.SubElement(episode, u'EpisodeName') episode_name.text = ep_to_write.name episode_number = etree.SubElement(episode, u'EpisodeNumber') episode_number.text = str(ep_obj.episode) if ep_obj.related_episodes: episode_number_end = etree.SubElement( episode, u'EpisodeNumberEnd') episode_number_end.text = str(ep_to_write.episode) season_number = etree.SubElement(episode, u'SeasonNumber') season_number.text = str(ep_to_write.season) if not ep_obj.related_episodes and getattr( my_ep, u'absolute_number', None): absolute_number = etree.SubElement(episode, u'absolute_number') absolute_number.text = str(my_ep[u'absolute_number']) if ep_to_write.airdate != datetime.date.fromordinal(1): first_aired = etree.SubElement(episode, u'FirstAired') first_aired.text = str(ep_to_write.airdate) metadata_type = etree.SubElement(episode, u'Type') metadata_type.text = u'Episode' if ep_to_write.description: overview = etree.SubElement(episode, u'Overview') overview.text = ep_to_write.description if not ep_obj.related_episodes: if getattr(my_ep, u'rating', None): rating = etree.SubElement(episode, u'Rating') rating.text = str(my_ep[u'rating']) if getattr(my_show, u'imdb_id', None): IMDB_ID = etree.SubElement(episode, u'IMDB_ID') IMDB_ID.text = my_show[u'imdb_id'] IMDB = etree.SubElement(episode, u'IMDB') IMDB.text = my_show[u'imdb_id'] IMDbId = etree.SubElement(episode, u'IMDbId') IMDbId.text = my_show[u'imdb_id'] indexer_id = etree.SubElement(episode, u'id') indexer_id.text = str(ep_to_write.indexerid) persons = etree.SubElement(episode, u'Persons') if getattr(my_show, u'_actors', None): for actor in my_show[u'_actors']: if not (u'name' in actor and actor[u'name'].strip()): continue cur_actor = etree.SubElement(persons, u'Person') cur_actor_name = etree.SubElement(cur_actor, u'Name') cur_actor_name.text = actor[u'name'].strip() cur_actor_type = etree.SubElement(cur_actor, u'Type') cur_actor_type.text = u'Actor' if u'role' in actor and actor[u'role'].strip(): cur_actor_role = etree.SubElement( cur_actor, u'Role') cur_actor_role.text = actor[u'role'].strip() language = etree.SubElement(episode, u'Language') try: language.text = my_ep[u'language'] except Exception: language.text = app.INDEXER_DEFAULT_LANGUAGE # tvrage api doesn't provide language so we must assume a value here thumb = etree.SubElement(episode, u'filename') # TODO: See what this is needed for.. if its still needed # just write this to the NFO regardless of whether it actually exists or not # note: renaming files after nfo generation will break this, tough luck thumb_text = self.get_episode_thumb_path(ep_obj) if thumb_text: thumb.text = thumb_text else: # append data from (if any) related episodes episode_number_end.text = str(ep_to_write.episode) if ep_to_write.name: if not episode_name.text: episode_name.text = ep_to_write.name else: episode_name.text = u', '.join( [episode_name.text, ep_to_write.name]) if ep_to_write.description: if not overview.text: overview.text = ep_to_write.description else: overview.text = u'\r'.join( [overview.text, ep_to_write.description]) # collect all directors, guest stars and writers if getattr(my_ep, u'director', None): persons_dict[u'Director'] += [ x.strip() for x in my_ep[u'director'].split(u'|') if x.strip() ] if getattr(my_ep, u'gueststars', None): persons_dict[u'GuestStar'] += [ x.strip() for x in my_ep[u'gueststars'].split(u'|') if x.strip() ] if getattr(my_ep, u'writer', None): persons_dict[u'Writer'] += [ x.strip() for x in my_ep[u'writer'].split(u'|') if x.strip() ] # fill in Persons section with collected directors, guest starts and writers for person_type, names in iteritems(persons_dict): # remove doubles names = list(set(names)) for cur_name in names: person = etree.SubElement(persons, u'Person') cur_person_name = etree.SubElement(person, u'Name') cur_person_name.text = cur_name cur_person_type = etree.SubElement(person, u'Type') cur_person_type.text = person_type # Make it purdy helpers.indent_xml(root_node) data = etree.ElementTree(root_node) return data
def _values_to_encode(data): return [(k, str(data[k]).strip().encode('utf8')) for k in data if k != 'signature']
def test_movie_fromguess_insufficient_data(movies): guess = {'type': 'movie'} with pytest.raises(ValueError) as excinfo: Movie.fromguess(movies['man_of_steel'].name, guess) assert str(excinfo.value) == 'Insufficient data to process the guess'
def test_scan_videos_path_is_not_a_directory(movies, tmpdir, monkeypatch): monkeypatch.chdir(str(tmpdir)) tmpdir.ensure(movies['man_of_steel'].name) with pytest.raises(ValueError) as excinfo: scan_videos(movies['man_of_steel'].name) assert str(excinfo.value) == 'Path is not a directory'
def test_hash_thesubdb_too_small(tmpdir): path = tmpdir.ensure('test_too_small.mkv') assert hash_thesubdb(str(path)) is None
def _ep_data(self, ep_obj): """ Creates an elementTree XML structure for a MediaBrowser style episode.xml and returns the resulting data object. show_obj: a Series instance to create the NFO for """ eps_to_write = [ep_obj] + ep_obj.related_episodes my_show = self._get_show_data(ep_obj.series) if not my_show: return None root_node = etree.Element('details') movie = etree.SubElement(root_node, 'movie') movie.attrib['isExtra'] = 'false' movie.attrib['isSet'] = 'false' movie.attrib['isTV'] = 'true' # write an MediaBrowser XML containing info for all matching episodes for ep_to_write in eps_to_write: try: my_ep = my_show[ep_to_write.season][ep_to_write.episode] except (IndexerEpisodeNotFound, IndexerSeasonNotFound): log.info( 'Unable to find episode {ep_num} on {indexer}...' ' has it been removed? Should I delete from db?', { 'ep_num': episode_num(ep_to_write.season, ep_to_write.episode), 'indexer': indexerApi(ep_obj.series.indexer).name, }) return None if ep_to_write == ep_obj: # root (or single) episode # default to today's date for specials if firstaired is not set if ep_to_write.season == 0 and not getattr( my_ep, 'firstaired', None): my_ep['firstaired'] = str(datetime.date.fromordinal(1)) if not (getattr(my_ep, 'episodename', None) and getattr(my_ep, 'firstaired', None)): return None episode = movie if ep_to_write.name: episode_name = etree.SubElement(episode, 'title') episode_name.text = ep_to_write.name season_number = etree.SubElement(episode, 'season') season_number.text = str(ep_to_write.season) episode_number = etree.SubElement(episode, 'episode') episode_number.text = str(ep_to_write.episode) if getattr(my_show, 'firstaired', None): try: year_text = str( datetime.datetime.strptime(my_show['firstaired'], dateFormat).year) if year_text: year = etree.SubElement(episode, 'year') year.text = year_text except Exception: pass if getattr(my_show, 'overview', None): plot = etree.SubElement(episode, 'plot') plot.text = my_show['overview'] if ep_to_write.description: overview = etree.SubElement(episode, 'episodeplot') overview.text = ep_to_write.description if getattr(my_show, 'contentrating', None): mpaa = etree.SubElement(episode, 'mpaa') mpaa.text = my_show['contentrating'] if not ep_obj.related_episodes and getattr( my_ep, 'rating', None): try: rating = int((float(my_ep['rating']) * 10)) except ValueError: rating = 0 if rating: rating = etree.SubElement(episode, 'rating') rating.text = str(rating) if getattr(my_ep, 'director', None): director = etree.SubElement(episode, 'director') director.text = my_ep['director'] if getattr(my_ep, 'writer', None): writer = etree.SubElement(episode, 'credits') writer.text = my_ep['writer'] if getattr(my_show, '_actors', None) or getattr( my_ep, 'gueststars', None): cast = etree.SubElement(episode, 'cast') if getattr(my_ep, 'gueststars', None) and isinstance( my_ep['gueststars'], string_types): for actor in (x.strip() for x in my_ep['gueststars'].split('|') if x.strip()): cur_actor = etree.SubElement(cast, 'actor') cur_actor.text = actor if getattr(my_show, '_actors', None): for actor in my_show['_actors']: if 'name' in actor and actor['name'].strip(): cur_actor = etree.SubElement(cast, 'actor') cur_actor.text = actor['name'].strip() else: # append data from (if any) related episodes if ep_to_write.name: if not episode_name.text: episode_name.text = ep_to_write.name else: episode_name.text = ', '.join( [episode_name.text, ep_to_write.name]) if ep_to_write.description: if not overview.text: overview.text = ep_to_write.description else: overview.text = '\r'.join( [overview.text, ep_to_write.description]) # Make it purdy helpers.indent_xml(root_node) data = etree.ElementTree(root_node) return data
def _output(self, fileformat, **keywords): """ Internal function that eases its modification by daughter classes. """ # check for stamp attribute keywords["stamp"] = getattr(self, '_stamp', '') # add the default parameters, they will be checked against the keywords util.setdefaults( keywords, cols=False, distances=False, entries=("concept", "counterpart"), entry='concept', fileformat=fileformat, filename=rcParams['filename'], formatter='concept', modify_ref=False, meta=self._meta, missing=0, prettify='false', ignore='all', ref='cogid', rows=False, subset=False, # setup a subset of the data, taxa='taxa', threshold=0.6, # threshold for flat clustering tree_calc='neighbor') if fileformat in ['triple', 'triples', 'triples.tsv']: return tsv2triple(self, keywords['filename'] + '.' + fileformat) if fileformat in ['paps.nex', 'paps.csv']: paps = self.get_paps(ref=keywords['ref'], entry=keywords['entry'], missing=keywords['missing']) kw = dict(filename=keywords['filename'] + '.paps') if fileformat == 'paps.nex': kw['missing'] = keywords['missing'] return pap2nex(self.cols, paps, **kw) return pap2csv(self.cols, paps, **kw) # simple printing of taxa if fileformat == 'taxa': assert hasattr(self, 'taxa') return util.write_text_file(keywords['filename'] + '.taxa', self.cols) # csv-output if fileformat in ['csv', 'qlc', 'tsv']: # get the header line header = sorted( [s for s in set(self._alias.values()) if s in self._header], key=lambda x: self._header[x]) header = [h.upper() for h in header] self._meta.setdefault('taxa', self.cols) # get the data, in case a subset is chosen if not keywords['subset']: # write stuff to file return wl2qlc(header, self._data, **keywords) cols, rows = keywords['cols'], keywords['rows'] if not isinstance(cols, (list, tuple, bool)): raise ValueError( "[i] Argument 'cols' should be list or tuple.") if not isinstance(rows, (dict, bool)): raise ValueError("[i] Argument 'rows' should be a dictionary.") # check for chosen header if cols: # get indices for header indices = [self._header[x] for x in cols] header = [c.upper() for c in cols] else: indices = [r for r in range(len(self.header))] if rows: stmts = [] for key, value in rows.items(): if key == 'ID': stmts += ["key " + value] else: idx = self._header[key] stmts += ["line[{0}] ".format(idx) + value] log.debug("calculated what should be excluded") # get the data out = {} for key, line in self._data.items(): log.debug(key) if rows: if eval(" and ".join(stmts)): out[key] = [line[i] for i in indices] else: out[key] = [line[i] for i in indices] log.debug("passing data to wl2qlc") return wl2qlc(header, out, **keywords) # output dst-format (phylip) if fileformat == 'dst': # check for distances as keyword if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self, **keywords) out = matrix2dst(self._meta['distances'], self.taxa, stamp=keywords['stamp'], taxlen=keywords.get('taxlen', 0)) return _write_file(keywords['filename'], out, fileformat) # output tre-format (newick) if fileformat in ['tre', 'nwk']: # ,'cluster','groups']: if 'tree' not in self._meta: # check for distances if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self) # we look up a function to calculate a tree in the cluster module: tree = getattr(cluster, keywords['tree_calc'])( self._meta['distances'], self.cols, distances=keywords['distances']) else: tree = self._meta['tree'] return _write_file(keywords['filename'], '{0}'.format(tree), fileformat) if fileformat in ['cluster', 'groups']: if 'distances' not in self._meta: self._meta['distances'] = wl2dst(self) # check for keywords if 'groups' not in self._meta: self._meta['groups'] = cluster.matrix2groups( keywords['threshold'], self._meta['distances'], self.taxa) lines = [] for taxon, group in sorted(self._meta['groups'].items(), key=lambda x: x[0]): lines.append('{0}\t{1}'.format(taxon, group)) return _write_file(keywords['filename'], lines, fileformat) if fileformat in ['starling', 'star.csv']: # make lambda inline for data-check l = lambda x: ['-' if x == 0 else x][0] lines = [] if 'cognates' not in keywords: lines.append('ID\tConcept\t' + '\t'.join(self.taxa)) for i, concept in enumerate(self.concepts): for line in self.get_list(row=concept, entry=keywords['entry']): lines.append( str(i + 1) + '\t' + concept + '\t' + '\t'.join([l(t) for t in line])) else: lines.append( 'ID\tConcept\t' + '\t'.join(['{0}\t COG'.format(t) for t in self.taxa])) for i, concept in enumerate(self.concepts): cogs = self.get_list(row=concept, entry=keywords['cognates']) for j, line in enumerate( self.get_list(row=concept, entry=keywords['entry'])): part = '\t'.join('{0}\t{1}'.format(l(a), b) for a, b in zip(line, cogs[j])) lines.append(util.tabjoin(i + 1, concept, part)) return _write_file(keywords['filename'], lines, 'starling_' + keywords['entry'] + '.csv') if fileformat == 'multistate.nex': if not keywords['filename'].endswith('.multistate.nex'): keywords['filename'] += '.multistate.nex' matrix = wl2multistate(self, keywords['ref'], keywords['missing']) return multistate2nex(self.taxa, matrix, keywords['filename']) if fileformat == 'separated': if not os.path.isdir(keywords['filename']): os.mkdir(keywords['filename']) for l in self.cols: lines = [''] if 'ignore_keys' in keywords else ['ID\t'] lines[0] += '\t'.join(x.upper() for x in keywords['entries']) for key in self.get_list(col=l, flat=True): line = [] if 'ignore_keys' in keywords else [key] for entry in keywords['entries']: tmp = self[key, entry] if isinstance(tmp, list): tmp = ' '.join([str(x) for x in tmp]) line += [tmp] lines.append('\t'.join('{0}'.format(x) for x in line)) _write_file('{0}/{1}'.format(keywords['filename'], l), lines, 'tsv')
def __str__(self): return ' '.join([str(x) for x in self])
def _show_data(self, show_obj): """ Creates an elementTree XML structure for a MediaBrowser-style series.xml returns the resulting data object. show_obj: a Series instance to create the NFO for """ my_show = self._get_show_data(show_obj) # If by any reason it couldn't get the shows indexer data let's not go throught the rest of this method # as that pretty useless. if not my_show: return False tv_node = etree.Element(u'Series') if getattr(my_show, u'id', None): indexerid = etree.SubElement(tv_node, u'id') indexerid.text = str(my_show[u'id']) if getattr(my_show, u'seriesname', None): series_name = etree.SubElement(tv_node, u'SeriesName') series_name.text = my_show[u'seriesname'] if getattr(my_show, u'status', None): status = etree.SubElement(tv_node, u'Status') status.text = my_show[u'status'] if getattr(my_show, u'network', None): network = etree.SubElement(tv_node, u'Network') network.text = my_show[u'network'] if getattr(my_show, u'airs_time', None): airs_time = etree.SubElement(tv_node, u'Airs_Time') airs_time.text = my_show[u'airs_time'] if getattr(my_show, u'airs_dayofweek', None): airs_day_of_week = etree.SubElement(tv_node, u'Airs_DayOfWeek') airs_day_of_week.text = my_show[u'airs_dayofweek'] first_aired = etree.SubElement(tv_node, u'FirstAired') if getattr(my_show, u'firstaired', None): first_aired.text = my_show[u'firstaired'] if getattr(my_show, u'contentrating', None): content_rating = etree.SubElement(tv_node, u'ContentRating') content_rating.text = my_show[u'contentrating'] mpaa = etree.SubElement(tv_node, u'MPAARating') mpaa.text = my_show[u'contentrating'] certification = etree.SubElement(tv_node, u'certification') certification.text = my_show[u'contentrating'] metadata_type = etree.SubElement(tv_node, u'Type') metadata_type.text = u'Series' if getattr(my_show, u'overview', None): overview = etree.SubElement(tv_node, u'Overview') overview.text = my_show[u'overview'] if getattr(my_show, u'firstaired', None): premiere_date = etree.SubElement(tv_node, u'PremiereDate') premiere_date.text = my_show[u'firstaired'] if getattr(my_show, u'rating', None): rating = etree.SubElement(tv_node, u'Rating') rating.text = str(my_show[u'rating']) if getattr(my_show, u'firstaired', None): try: year_text = str( datetime.datetime.strptime(my_show[u'firstaired'], dateFormat).year) if year_text: production_year = etree.SubElement(tv_node, u'ProductionYear') production_year.text = year_text except Exception: pass if getattr(my_show, u'runtime', None): running_time = etree.SubElement(tv_node, u'RunningTime') running_time.text = str(my_show[u'runtime']) runtime = etree.SubElement(tv_node, u'Runtime') runtime.text = str(my_show[u'runtime']) if getattr(my_show, u'imdb_id', None): imdb_id = etree.SubElement(tv_node, u'IMDB_ID') imdb_id.text = my_show[u'imdb_id'] imdb_id = etree.SubElement(tv_node, u'IMDB') imdb_id.text = my_show[u'imdb_id'] imdb_id = etree.SubElement(tv_node, u'IMDbId') imdb_id.text = my_show[u'imdb_id'] if getattr(my_show, u'zap2it_id', None): zap2it_id = etree.SubElement(tv_node, u'Zap2ItId') zap2it_id.text = my_show[u'zap2it_id'] if getattr(my_show, u'genre', None) and isinstance( my_show[u'genre'], string_types): genres = etree.SubElement(tv_node, u'Genres') for genre in my_show[u'genre'].split(u'|'): if genre.strip(): cur_genre = etree.SubElement(genres, u'Genre') cur_genre.text = genre.strip() genre = etree.SubElement(tv_node, u'Genre') genre.text = u'|'.join([ x.strip() for x in my_show[u'genre'].split(u'|') if x.strip() ]) if getattr(my_show, u'network', None): studios = etree.SubElement(tv_node, u'Studios') studio = etree.SubElement(studios, u'Studio') studio.text = my_show[u'network'] if getattr(my_show, u'_actors', None): persons = etree.SubElement(tv_node, u'Persons') for actor in my_show[u'_actors']: if not (u'name' in actor and actor[u'name'].strip()): continue cur_actor = etree.SubElement(persons, u'Person') cur_actor_name = etree.SubElement(cur_actor, u'Name') cur_actor_name.text = actor[u'name'].strip() cur_actor_type = etree.SubElement(cur_actor, u'Type') cur_actor_type.text = u'Actor' if u'role' in actor and actor[u'role'].strip(): cur_actor_role = etree.SubElement(cur_actor, u'Role') cur_actor_role.text = actor[u'role'].strip() helpers.indent_xml(tv_node) data = etree.ElementTree(tv_node) return data
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: rows = html('item') if not rows: log.debug( 'No results returned from provider. Check chosen Newznab search categories' ' in provider settings and/or usenet retention') return items for item in rows: try: title = item.title.get_text(strip=True) download_url = None if item.enclosure: url = item.enclosure.get('url', '').strip() if url: download_url = url if not download_url and item.link: url = item.link.get_text(strip=True) if url: download_url = url if not download_url: url = item.link.next.strip() if url: download_url = url if not (title and download_url): continue if 'gingadaddy' in self.url: size_regex = re.search(r'\d*.?\d* [KMGT]B', str(item.description)) item_size = size_regex.group() if size_regex else -1 else: item_size = item.size.get_text( strip=True) if item.size else -1 # Use regex to find name-spaced tags # see BeautifulSoup4 bug 1720605 # https://bugs.launchpad.net/beautifulsoup/+bug/1720605 newznab_attrs = item(re.compile('newznab:attr')) for attr in newznab_attrs: item_size = attr['value'] if attr[ 'name'] == 'size' else item_size size = convert_size(item_size) or -1 pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'pubdate': pubdate, } if mode != 'RSS': log.debug('Found result: {0}', title) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def search(self, search_strings, age=0, ep_obj=None, force_query=False, manual_search=False, **kwargs): """ Search a provider and parse the results. :param search_strings: A dict with mode (key) and the search value (value) :param age: Not used :param ep_obj: Not used :param force_query: Newznab will by default search using the tvdb/tmdb/imdb id for a show. As a backup it can also search using a query string, like the showtitle with the season/episode number. The force_query parameter can be passed to force a search using the query string. :param manual_search: If the search is started through a manual search, we're utilizing the force_query param. :returns: A list of search results (structure) """ results = [] if not self._check_auth(): return results # For providers that don't have caps, or for which the t=caps is not working. if not self.params and all( provider not in self.url for provider in self.providers_without_caps): self.get_capabilities(just_params=True) # Search Params search_params = { 't': 'search', 'limit': 100, 'offset': 0, 'cat': ','.join(self.cat_ids), 'maxage': app.USENET_RETENTION } for mode in search_strings: log.debug('Search mode: {0}', mode) if self.needs_auth and self.api_key: search_params['apikey'] = self.api_key if mode != 'RSS': match_indexer = self._match_indexer() if match_indexer and not force_query: search_params['t'] = 'tvsearch' search_params.update(match_indexer) if ep_obj.series.air_by_date or ep_obj.series.sports: date_str = str(ep_obj.airdate) search_params['season'] = date_str.partition('-')[0] search_params['ep'] = date_str.partition( '-')[2].replace('-', '/') else: search_params['season'] = ep_obj.scene_season search_params['ep'] = ep_obj.scene_episode else: search_params['t'] = 'search' if mode == 'Season': search_params.pop('ep', '') for search_string in search_strings[mode]: if mode != 'RSS': # If its a PROPER search, need to change param to 'search' # so it searches using 'q' param if any(proper_string in search_string for proper_string in self.proper_strings): search_params['t'] = 'search' log.debug( 'Search show using {search}', { 'search': 'search string: {search_string}'.format( search_string=search_string if search_params['t'] != 'tvsearch' else 'indexer_id: {indexer_id}'.format( indexer_id=match_indexer)) }) if search_params['t'] != 'tvsearch': search_params['q'] = search_string response = self.session.get(urljoin(self.url, 'api'), params=search_params) if not response or not response.text: log.debug('No data returned from provider') continue results += self.parse(response.text, mode) # Since we aren't using the search string, # break out of the search string loop if any(param in search_params for param in itervalues(INDEXERS_PARAM)): break # Reprocess but now use force_query = True if there are no results if not results and not force_query: return self.search(search_strings, ep_obj=ep_obj, force_query=True) return results
def tell_sentry(exception, state, allow_reraise=True): if isinstance(exception, pando.Response) and exception.code < 500: # Only log server errors return if isinstance(exception, NeedDatabase): # Don't flood Sentry when DB is down return if isinstance(exception, psycopg2.Error): from liberapay.website import website if getattr(website, 'db', None): try: website.db.one('SELECT 1 AS x') except psycopg2.Error: # If it can't answer this simple query, it's down. website.db = NoDB() # Show the proper 503 error page state['exception'] = NeedDatabase() # Tell gunicorn to gracefully restart this worker os.kill(os.getpid(), signal.SIGTERM) if 'read-only' in str(exception): # DB is in read only mode state['db_is_readonly'] = True # Show the proper 503 error page state['exception'] = NeedDatabase() # Don't reraise this in tests allow_reraise = False if not sentry: # No Sentry, log to stderr instead traceback.print_exc() # Reraise if allowed if env.sentry_reraise and allow_reraise: raise return user = state.get('user') extra = {} if user is None: user_id = 'no user' elif user is ANON: user_id = 'ANON' elif not hasattr(user, 'id'): user_id = 'no id' else: user_id = user.id extra['user_url'] = 'https://liberapay.com/~{}/'.format(user_id) # Tell Sentry tags = { 'user_id': user_id, 'username': getattr(user, 'username', None), } extra['request_line'] = getattr(state.get('request'), 'line', None) result = sentry.captureException(tags=tags, extra=extra) # Put the Sentry id in the state for logging, etc state['sentry_ident'] = sentry.get_ident(result)
def render_content(self, context): d = dict((k, v) for k, v in self.__dict__.items() if k[0] != '_') return str(json_.dumps(d))
def _export(self, fileformat, sections=None, entries=None, entry_sep='', item_sep='', template='', exclude=None, entry_start='', entry_close='', **keywords): """ Export a wordlist to various file formats. """ if not sections: if fileformat == 'txt': sections = dict(h1=('concept', '\n# Concept: {0}\n'), h2=('cogid', '## Cognate-ID: {0}\n')) elif fileformat == 'tex': sections = dict( h1=('concept', r'\section{{Concept: ``{0}"}}' + '\n'), h2=('cogid', r'\subsection{{Cognate Set: ``{0}"}}' + '\n')) elif fileformat == 'html': sections = dict(h1=('concept', '<h1>Concept: {0}</h1>'), h2=('cogid', '<h2>Cognate Set: {0}</h2>')) if not entries: if fileformat == 'txt': entries = [('language', '{0} '), ('ipa', '{0}\n')] elif fileformat == 'tex': entries = [('language', '{0} '), ('ipa', '[{0}]' + '\n')] elif fileformat == 'html': entries = [('language', '{0} '), ('ipa', '[{0}]\n')] util.setdefaults(keywords, filename=rcParams['filename']) # get the temporary dictionary out = wl2dict(self, sections, entries, exclude) # assign the output string out_string = '' # iterate over the dictionary and start to fill the string for key in sorted(out, key=lambda x: str(x).lower()): # write key to file out_string += key[1] # reassign tmp tmp = out[key] # set the pointer and the index pointer = {0: [tmp, sorted(tmp.keys())]} while True: idx = max(pointer.keys()) # check for type of current point if isinstance(tmp, dict): if pointer[idx][1]: next_key = pointer[idx][1].pop() out_string += next_key[1] tmp = pointer[idx][0][next_key] if isinstance(tmp, dict): pointer[idx + 1] = [tmp, sorted(tmp.keys())] else: pointer[idx + 1] = [tmp, tmp] else: del pointer[idx] if idx == 0: break else: tmp_strings = [] for line in sorted(tmp): tmp_strings += [item_sep.join(line)] out_string += entry_start + entry_sep.join( tmp_strings) + entry_close tmp = pointer[idx - 1][0] del pointer[idx] if fileformat == 'tex': out_string = out_string.replace('_', r'\_') tmpl = util.read_text_file(template) if template else '{0}' _write_file(keywords['filename'], tmpl.format(out_string), fileformat)
def __init__(self, infile=None, col='list', row='key', conf=None): QLCParser.__init__(self, infile or util.data_path('swadesh', 'swadesh.qlc'), conf or util.data_path('conf', 'swadesh.rc')) # get row and key index if not hasattr(self, '_rowidx'): try: rowIdx = self.header[self._alias[row]] colIdx = self.header[self._alias[col]] except: raise ValueError( "[!] Could not find row and col in configuration or input file!" ) basic_rows = sorted(set([ self._data[k][rowIdx] for k in self._data if k != 0 and type(k) == int ]), key=lambda x: ('%s' % x).lower()) basic_cols = sorted(set([ self._data[k][colIdx] for k in self._data if k != 0 and type(k) == int ]), key=lambda x: x.lower()) # define rows and cols as attributes of the word list self.rows = basic_rows self.cols = basic_cols # define height and width of the word list self.height = len(self.rows) self.width = len(self.cols) # row and column index point to the place where the data of the main # items is stored in the original dictionary self._rowIdx = rowIdx self._colIdx = colIdx self._row_name = self._alias[row] self._col_name = self._alias[col] # create a basic array which assigns ids for the entries in a starling # manner. # first, find out, how many items (== synonyms) are there maximally for # each row tmp_dict = {} for key, value in [(k, v) for k, v in self._data.items() if k != 0 and str(k).isnumeric()]: try: tmp_dict[value[rowIdx]][value[colIdx]] += [key] except KeyError: try: tmp_dict[value[rowIdx]][value[colIdx]] = [key] except KeyError: tmp_dict[value[rowIdx]] = {} tmp_dict[value[rowIdx]][value[colIdx]] = [key] # assign the values as _dict-attribute to the dictionary self._dict = tmp_dict # create the array by counting the maximal number of occurrences, store # the row names separately in a dictionary tmp_list = [] row_dict = {} count = 0 for k, d in self._dict.items(): row_dict[k] = [] # get maximal amount of "synonyms" m = max([len(x) for x in d.values()]) for i in range(m): tmp = [] for j in range(self.width): try: tmp.append(d[self.cols[j]][i]) except: tmp.append(0) row_dict[k] += [count] count += 1 tmp_list += [tmp] # create the array self._array = np.array(tmp_list) self._idx = row_dict # add indices to alias dictionary for swadesh lists for i, col in enumerate(self.cols): self._meta[col] = self._array[np.nonzero(self._array[:, i]), i][0] # define a cache dictionary for stored data for quick access self._cache = {}
def _show_data(self, show_obj): """ Creates an elementTree XML structure for a MediaBrowser-style series.xml returns the resulting data object. show_obj: a Series instance to create the NFO for """ my_show = self._get_show_data(show_obj) # If by any reason it couldn't get the shows indexer data let's not go throught the rest of this method # as that pretty useless. if not my_show: return False root_node = etree.Element('details') tv_node = etree.SubElement(root_node, 'movie') tv_node.attrib['isExtra'] = 'false' tv_node.attrib['isSet'] = 'false' tv_node.attrib['isTV'] = 'true' title = etree.SubElement(tv_node, 'title') title.text = my_show['seriesname'] if getattr(my_show, 'genre', None): genres = etree.SubElement(tv_node, 'genres') for genre in my_show['genre'].split('|'): if genre and genre.strip(): cur_genre = etree.SubElement(genres, 'Genre') cur_genre.text = genre.strip() if getattr(my_show, 'firstaired', None): first_aired = etree.SubElement(tv_node, 'premiered') first_aired.text = my_show['firstaired'] try: year_text = str( datetime.datetime.strptime(my_show['firstaired'], dateFormat).year) if year_text: year = etree.SubElement(tv_node, 'year') year.text = year_text except Exception: pass if getattr(my_show, 'overview', None): plot = etree.SubElement(tv_node, 'plot') plot.text = my_show['overview'] if getattr(my_show, 'rating', None): try: rating = int(float(my_show['rating']) * 10) except ValueError: rating = 0 if rating: rating = etree.SubElement(tv_node, 'rating') rating.text = str(rating) if getattr(my_show, 'status', None): status = etree.SubElement(tv_node, 'status') status.text = my_show['status'] if getattr(my_show, 'contentrating', None): mpaa = etree.SubElement(tv_node, 'mpaa') mpaa.text = my_show['contentrating'] if getattr(my_show, 'imdb_id', None): imdb_id = etree.SubElement(tv_node, 'id') imdb_id.attrib['moviedb'] = 'imdb' imdb_id.text = my_show['imdb_id'] if getattr(my_show, 'id', None): indexer_id = etree.SubElement(tv_node, 'indexerid') indexer_id.text = str(my_show['id']) if getattr(my_show, 'runtime', None): runtime = etree.SubElement(tv_node, 'runtime') runtime.text = str(my_show['runtime']) if getattr(my_show, '_actors', None): cast = etree.SubElement(tv_node, 'cast') for actor in my_show['_actors']: if 'name' in actor and actor['name'].strip(): cur_actor = etree.SubElement(cast, 'actor') cur_actor.text = actor['name'].strip() helpers.indent_xml(root_node) data = etree.ElementTree(root_node) return data
def __init__(self, filename, conf=''): """ Parse data regularly if the data has not been loaded from a pickled version. """ self.log = log.get_logger() # try to load the data internal_import = False # check whether it's a dictionary from which we load if isinstance(filename, dict): input_data = filename if 'filename' not in input_data: self.filename = rcParams['filename'] internal_import = True # make check for correct input, there was a bug with a wrong # evaluation which is hopefully fixed by now tmp_keys = [k for k in input_data if isinstance(k, int)] if len(input_data[0]) != len(input_data[tmp_keys[0]]): print(input_data[0], input_data[tmp_keys[0]]) raise ValueError("[!] Wrong input format!") # pragma: no cover # check whether it's another wordlist-object elif hasattr(filename, '_data') and hasattr(filename, '_meta'): input_data = dict([(key, [v for v in value]) for key, value in \ filename._data.items()]) input_data.update(filename._meta.items()) input_data[0] = [ a for a, b in sorted( filename.header.items(), key=lambda x: x[1], reverse=False) ] internal_import = True self.filename = rcParams['filename'] # or whether the data is an actual file elif isinstance(filename, string_types) and os.path.isfile(filename): input_data = read_qlc(filename) self.filename = filename # raise an error otherwise elif isinstance(filename, string_types): raise IOError("Input file '{0}' does not exist.".format(filename)) else: raise TypeError( "Unrecognized type for 'filename' argument: {0}".format( type(filename).__name__)) # load the configuration file if not conf: conf = util.data_path('conf', 'qlc.rc') # read the file defined by its path in conf tmp = [line.split('\t') for line in util.read_config_file(conf)] # define two attributes, _alias, and _class which store the aliases and # the datatypes (classes) of the given entries self._alias, self._class, self._class_string, self._alias2 = {}, {}, {}, {} for name, cls, alias in tmp: # make sure the name itself is there self._alias[name.lower()] = self._alias[name.upper()] = name self._class[name.lower()] = self._class[name.upper()] = eval(cls) self._class_string[name.lower()] = self._class_string[ name.upper()] = cls # add the aliases for a in alias.split(','): self._alias[a.lower()] = self._alias[a.upper()] = name self._class[a.lower()] = self._class[a.upper()] = eval(cls) self._class_string[a.lower()] = self._class_string[ a.upper()] = cls self._alias2[name] = sorted(set(alias.split(','))) + [name] # append the names in data[0] to self.conf to make sure that all data # is covered, even the types which are not specifically defined in the # conf file. the datatype defaults here to "str" for name in input_data[0]: if name.lower() not in self._alias: self._alias[name.lower()] = name.lower() self._class[name.lower()] = str if name.upper() not in self._alias: self._alias[name.upper()] = name.lower() self._class[name.upper()] = str # add empty alias for empty strings XXX why was that? I can't remember # why this was important XXX self._alias[''] = '' # the header stores the indices of the data in the original data dictionary self.header = dict( zip([self._alias[x] for x in input_data[0]], range(len(input_data[0])))) # now create a specific header which has all aliases self._header = {k: v for k, v in self.header.items()} # add a sorted header for reference self.columns = sorted(self.header, key=lambda x: self.header[x]) # assign all aliases to the header for alias in self._alias: try: self._header[alias] = self._header[self._alias[alias]] except: pass # assign the data as attribute to the word list class. Note that we # need to check for the type here, but since numpy also offers integer # types, we don't check for type(x) == int, but instead use the # str.numeric-function that returns numeric values only if it is an # integer self._data = { int(k): v for k, v in input_data.items() if k != 0 and str(k).isnumeric() } # check for same length of all columns check_errors = '' for k, v in self._data.items(): if len(v) != len(self.header): check_errors += 'Row {0} in your data contains {1} fields (expected {2})\n'.format( k, len(v), len(self.header)) if check_errors: raise ValueError(check_errors + '\n' + ', '.join(sorted(self.header))) # iterate over self._data and change the values according to the # functions (only needed when reading from file) if not internal_import: heads = sorted(self._header.items(), key=lambda x: x[1]) for key in self._data: check = [] for head, i in heads: if i not in check: logstring = 'Problem with row {0} in col {1}, expected' + \ ' «{4}» as datatype but received «{3}» ' + \ ' (ROW: {2}, entry {5}).' try: self._data[key][i] = self._class[head]( self._data[key][i]) check.append(i) except KeyError: log.warn( logstring.format( key, i, '|'.join([str(x) for x in self._data[key] ]), self._data[key][i], self._class[head], head)) except ValueError: log.warn( logstring.format( key, i, '|'.join([str(x) for x in self._data[key] ]), self._data[key][i], self._class[head], head)) # create entry attribute of the wordlist self.entries = sorted( set([b.lower() for a, b in self._alias.items() if b])) # assign meta-data self._meta = {} for key in [k for k in input_data if type(k) != int]: self._meta[key] = input_data[key]
def tell_sentry(exception, state, allow_reraise=True): if isinstance(exception, pando.Response) and exception.code < 500: # Only log server errors return if isinstance(exception, NeedDatabase): # Don't flood Sentry when DB is down return if isinstance(exception, psycopg2.Error): from liberapay.website import website if getattr(website, 'db', None): try: website.db.one('SELECT 1 AS x') except psycopg2.Error: # If it can't answer this simple query, it's down. website.db = NoDB() # Show the proper 503 error page state['exception'] = NeedDatabase() # Tell gunicorn to gracefully restart this worker os.kill(os.getpid(), signal.SIGTERM) if 'read-only' in str(exception): # DB is in read only mode state['db_is_readonly'] = True # Show the proper 503 error page state['exception'] = NeedDatabase() # Don't reraise this in tests allow_reraise = False if isinstance(exception, ValueError): if 'cannot contain NUL (0x00) characters' in str(exception): # https://github.com/liberapay/liberapay.com/issues/675 response = state.get('response') or pando.Response() response.code = 400 response.body = str(exception) return {'exception': None} if not sentry: # No Sentry, log to stderr instead traceback.print_exc() # Reraise if allowed if env.sentry_reraise and allow_reraise: raise return {'sentry_ident': None} # Prepare context data sentry_data = {} if state: try: sentry_data['tags'] = { 'lang': getattr(state.get('locale'), 'language', None), } request = state.get('request') user_data = sentry_data['user'] = {} if request is not None: user_data['ip_address'] = str(request.source) sentry_data['request'] = { 'method': request.method, 'url': request.line.uri, 'headers': { k: b', '.join(v) for k, v in request.headers.items() if k != b'Cookie' }, } user = state.get('user') if isinstance(user, Participant): user_data['id'] = getattr(user, 'id', None) user_data['username'] = getattr(user, 'username', None) except Exception as e: tell_sentry(e, {}) # Tell Sentry result = sentry.captureException(data=sentry_data) # Put the Sentry id in the state for logging, etc return {'sentry_ident': sentry.get_ident(result)}
def get(self, key): return str(self.source.get(self.sectionkey, key))
def __init__(self, filename, row, col, conf): QLCParser.__init__(self, filename, conf=conf) try: self._row_name = self._alias[row] self._col_name = self._alias[col] rowIdx = self.header[self._alias[row]] colIdx = self.header[self._alias[col]] except KeyError: raise ValueError( "Could not find row or col in configuration or input file!") def unique_sorted(idx, key): return sorted(set([ self._data[k][idx] for k in self._data if k != 0 and isinstance(k, int) ]), key=key) # define rows and cols as attributes of the word list self.rows = unique_sorted(rowIdx, lambda x: ('%s' % x).lower()) self.cols = unique_sorted(colIdx, lambda x: x.lower()) # define height and width of the word list self.height = len(self.rows) self.width = len(self.cols) # row and column index point to the place where the data of the main # items is stored in the original dictionary self._rowIdx = rowIdx self._colIdx = colIdx # create a basic array which assigns ids for the entries in a starling manner. # first, find out, how many items (== synonyms) are there maximally for each row self._dict = defaultdict(lambda: defaultdict(list)) for key, value in [(k, v) for k, v in self._data.items() if k != 0 and str(k).isnumeric()]: self._dict[value[rowIdx]][value[colIdx]].append(key) # We must cast to a regular dict to make the attribute picklable. self._dict = dict(self._dict) # create the array by counting the maximal number of occurrences, store # the row names separately in a dictionary tmp_list = [] self._idx = {} count = 0 for k, d in self._dict.items(): self._idx[k] = [] # get maximal amount of "synonyms" for i in range(max([len(x) for x in d.values()])): tmp = [] for j in range(self.width): try: tmp.append(d[self.cols[j]][i]) except: tmp.append(0) self._idx[k] += [count] count += 1 tmp_list += [tmp] self._array = np.array(tmp_list)
def _get_test_name(string): """Returns test name for resource.""" return TEST_NAME_PREFIX + str(string)
def parse(self, data, mode): """ Parse search results for items. :param data: The raw response from a search :param mode: The current mode used to search, e.g. RSS :return: A list of items found """ items = [] with BS4Parser(data, 'html5lib') as html: rows = html('item') if not rows: log.debug( 'No results returned from provider. Check chosen Newznab search categories' ' in provider settings and/or usenet retention') return items try: self.torznab = 'xmlns:torznab' in html.rss.attrs except AttributeError: self.torznab = False for item in rows: try: title = item.title.get_text(strip=True) download_url = None if item.enclosure: url = item.enclosure.get('url', '').strip() if url.startswith('magnet:'): download_url = url elif validators.url(url): download_url = url # Jackett needs extension added (since v0.8.396) if not url.endswith('.torrent'): content_type = item.enclosure.get('type', '') if content_type == 'application/x-bittorrent': download_url = '{0}{1}'.format( url, '.torrent') if not download_url and item.link: url = item.link.get_text(strip=True) if validators.url(url) or url.startswith('magnet:'): download_url = url if not download_url: url = item.link.next.strip() if validators.url(url) or url.startswith( 'magnet:'): download_url = url if not (title and download_url): continue seeders = leechers = -1 if 'gingadaddy' in self.url: size_regex = re.search(r'\d*.?\d* [KMGT]B', str(item.description)) item_size = size_regex.group() if size_regex else -1 else: item_size = item.size.get_text( strip=True) if item.size else -1 # Use regex to find name-spaced tags # see BeautifulSoup4 bug 1720605 # https://bugs.launchpad.net/beautifulsoup/+bug/1720605 newznab_attrs = item(re.compile('newznab:attr')) torznab_attrs = item(re.compile('torznab:attr')) for attr in newznab_attrs + torznab_attrs: item_size = attr['value'] if attr[ 'name'] == 'size' else item_size seeders = try_int( attr['value'] ) if attr['name'] == 'seeders' else seeders peers = try_int( attr['value'] ) if attr['name'] == 'peers' else None leechers = peers - seeders if peers else leechers if not item_size or (self.torznab and (seeders == -1 or leechers == -1)): continue size = convert_size(item_size) or -1 pubdate_raw = item.pubdate.get_text(strip=True) pubdate = self.parse_pubdate(pubdate_raw) item = { 'title': title, 'link': download_url, 'size': size, 'seeders': seeders, 'leechers': leechers, 'pubdate': pubdate, } if mode != 'RSS': if seeders == -1: log.debug('Found result: {0}', title) else: log.debug( 'Found result: {0} with {1} seeders and {2} leechers', title, seeders, leechers) items.append(item) except (AttributeError, TypeError, KeyError, ValueError, IndexError): log.exception('Failed parsing provider.') return items
def test_video_fromguess_wrong_type(episodes): guess = {'type': 'subtitle'} with pytest.raises(ValueError) as excinfo: Video.fromguess(episodes['bbt_s07e05'].name, guess) assert str( excinfo.value) == 'The guess must be an episode or a movie guess'
def _ep_data(self, ep_obj): """ Creates an elementTree XML structure for a WDTV style episode.xml and returns the resulting data object. ep_obj: a Series instance to create the NFO for """ eps_to_write = [ep_obj] + ep_obj.related_episodes my_show = self._get_show_data(ep_obj.series) if not my_show: return None root_node = etree.Element('details') # write an WDTV XML containing info for all matching episodes for ep_to_write in eps_to_write: try: my_ep = my_show[ep_to_write.season][ep_to_write.episode] except (IndexerEpisodeNotFound, IndexerSeasonNotFound): log.info( 'Unable to find episode {number} on {indexer}... has it been removed? Should I delete from db?', { 'number': ep_num(ep_to_write.season, ep_to_write.episode), 'indexer': indexerApi(ep_obj.series.indexer).name, } ) return None if ep_obj.season == 0 and not getattr(my_ep, 'firstaired', None): my_ep['firstaired'] = str(datetime.date.fromordinal(1)) if not (getattr(my_ep, 'episodename', None) and getattr(my_ep, 'firstaired', None)): return None if len(eps_to_write) > 1: episode = etree.SubElement(root_node, 'details') else: episode = root_node # TODO: get right EpisodeID episode_id = etree.SubElement(episode, 'id') episode_id.text = str(ep_to_write.indexerid) title = etree.SubElement(episode, 'title') title.text = ep_obj.pretty_name() if getattr(my_show, 'seriesname', None): series_name = etree.SubElement(episode, 'series_name') series_name.text = my_show['seriesname'] if ep_to_write.name: episode_name = etree.SubElement(episode, 'episode_name') episode_name.text = ep_to_write.name season_number = etree.SubElement(episode, 'season_number') season_number.text = str(ep_to_write.season) episode_num = etree.SubElement(episode, 'episode_number') episode_num.text = str(ep_to_write.episode) first_aired = etree.SubElement(episode, 'firstaired') if ep_to_write.airdate != datetime.date.fromordinal(1): first_aired.text = str(ep_to_write.airdate) if getattr(my_show, 'firstaired', None): try: year_text = str(datetime.datetime.strptime(my_show['firstaired'], dateFormat).year) if year_text: year = etree.SubElement(episode, 'year') year.text = year_text except Exception: pass if ep_to_write.season != 0 and getattr(my_show, 'runtime', None): runtime = etree.SubElement(episode, 'runtime') runtime.text = str(my_show['runtime']) if getattr(my_show, 'genre', None): genre = etree.SubElement(episode, 'genre') genre.text = ' / '.join([x.strip() for x in my_show['genre'].split('|') if x.strip()]) if getattr(my_ep, 'director', None): director = etree.SubElement(episode, 'director') director.text = my_ep['director'] if getattr(my_show, '_actors', None): for actor in my_show['_actors']: if not ('name' in actor and actor['name'].strip()): continue cur_actor = etree.SubElement(episode, 'actor') cur_actor_name = etree.SubElement(cur_actor, 'name') cur_actor_name.text = actor['name'] if 'role' in actor and actor['role'].strip(): cur_actor_role = etree.SubElement(cur_actor, 'role') cur_actor_role.text = actor['role'].strip() if ep_to_write.description: overview = etree.SubElement(episode, 'overview') overview.text = ep_to_write.description # Make it purdy helpers.indent_xml(root_node) data = etree.ElementTree(root_node) return data
def test_hash_opensubtitles_too_small(tmpdir): path = tmpdir.ensure('test_too_small.mkv') assert hash_opensubtitles(str(path)) is None
def __add__(self, other): return lists(str(self) + self.sep + str(other))
def test_scan_videos_path_does_not_exist(movies): with pytest.raises(ValueError) as excinfo: scan_videos(movies['man_of_steel'].name) assert str(excinfo.value) == 'Path does not exist'
def __add__(self, other): return _strings(self._type, str(self) + ' ' + str(_strings(self._type, other)))
def __repr__(self): return '%s(%r, params=%r)' % (self.__class__.__name__, str(self), self.params)
def __init__(self, *args, **kwargs): get_first_name = getattr(settings, 'PAYFAST_GET_USER_FIRST_NAME', attrgetter('first_name')) get_last_name = getattr(settings, 'PAYFAST_GET_USER_LAST_NAME', attrgetter('last_name')) user = kwargs.pop('user', None) if user: if get_first_name is not None: kwargs['initial'].setdefault('name_first', get_first_name(user)) if get_last_name is not None: kwargs['initial'].setdefault('name_last', get_last_name(user)) # Django 1.11 adds AbstractBaseUser.get_email_field_name() email_address = (user.email if django.VERSION < (1, 11) else getattr( user, get_user_model().get_email_field_name())) kwargs['initial'].setdefault('email_address', email_address) kwargs['initial'].setdefault('notify_url', notify_url()) kwargs['initial'].setdefault('merchant_id', conf.MERCHANT_ID) kwargs['initial'].setdefault('merchant_key', conf.MERCHANT_KEY) super(PayFastForm, self).__init__(*args, **kwargs) if 'm_payment_id' in self.initial: # If the caller supplies m_payment_id, find the existing order, or create it. (self.order, created) = PayFastOrder.objects.get_or_create( m_payment_id=self.initial['m_payment_id'], defaults=dict( user=user, amount_gross=self.initial['amount'], ), ) if not created: # If the order is existing, check the user and amount fields, # and update if necessary. # # XXX: Also consistency-check that the order is not paid yet? # if not (self.order.user == user and self.order.amount_gross == self.initial['amount']): self.order.user = user self.order.amount_gross = self.initial['amount'] self.order.save() else: # Old path: Create a new PayFastOrder each time form is instantiated. self.order = PayFastOrder.objects.create( user=user, amount_gross=self.initial['amount'], ) # Initialise m_payment_id from the pk. self.order.m_payment_id = str(self.order.pk) self.order.save() self.initial['m_payment_id'] = self.order.m_payment_id # Coerce values to strings, for signing. data = {k: str(v) for (k, v) in self.initial.items()} self._signature = self.fields[ 'signature'].initial = api.checkout_signature(data)