def discoverNamespace(namespace): if not wikifyNamespace(namespace) in DumpdataProvider.getMembers("") and not wikifyNamespace(namespace) == "": raise ResourceNotFoundException() response = { "classifier": "Namespace", "name": namespace, "members": [], } # , 'github': DumpdataProvider.getGithub(namespace, '')} # gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, "Namespace", wikiNS) response["namespace"] = "Namespace" # gather member data members = DumpdataProvider.getMembers(namespace) for member in members: response["members"].append( { "resource": os.path.join(base_uri, namespace, member).replace(" ", "_"), "classifier": "Namespace member", "name": member, } ) return response
def discoverNamespace(namespace): if not wikifyNamespace(namespace) in DumpdataProvider.getMembers('') and not wikifyNamespace(namespace) == '': raise ResourceNotFoundException() response = {'classifier': 'Namespace', 'name': namespace, 'members': []}#, 'github': DumpdataProvider.getGithub(namespace, '')} #gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response,'Namespace', wikiNS) response['namespace'] = 'Namespace' #gather member data members = DumpdataProvider.getMembers(namespace) for member in members: response['members'].append({ 'resource': os.path.join(base_uri, namespace, member).replace(' ', '_'), 'classifier': 'Namespace member', 'name' : member }) #response['endpoint'] = TripledataProvider.getEndpointLink('Namespace', wikiNS) #response['sesame'] = TripledataProvider.getSesameLink('namespaces', wikiNS) return response
def discoverNamespaceMember(namespace, member): if not member.decode("utf_8") in DumpdataProvider.getMembers(namespace): raise ResourceNotFoundException() response = { "folders": [], "files": [], "classifier": "Namespace member", "name": member, "github": DumpdataProvider.getGithub(namespace, member), } # gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response["namespace"] = wikiNS # gather member data dirPath = os.path.join(namespace, member.replace(" ", "_")) files, dirs = DumpdataProvider.getDirContent(dirPath) for d in dirs: response["folders"].append({"resource": os.path.join(base_uri, dirPath, d), "classifier": "Folder", "name": d}) for f in files: response["files"].append({"resource": os.path.join(base_uri, dirPath, f), "classifier": "File", "name": f}) # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) if namespace == "modules": response["module"] = DumpdataProvider.getModuleDescription(member) return response
def discoverMemberPath(namespace, member, path): response = { "folders": [], "files": [], "classifier": "Folder", "name": os.path.basename(path), "github": DumpdataProvider.getGithub(namespace, member), } # update github data if response["github"]: response["github"] = os.path.join(response["github"], path) # gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response["namespace"] = wikiNS # gather member data dirPath = os.path.join(namespace, member, path) files, dirs = DumpdataProvider.getDirContent(dirPath) for d in dirs: response["folders"].append({"resource": os.path.join(base_uri, dirPath, d), "classifier": "Folder", "name": d}) for f in files: response["files"].append({"resource": os.path.join(base_uri, dirPath, f), "classifier": "File", "name": f}) # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) return response
def query_and_analyze_entity(entity): assigned_resources_mutex.acquire() # MUTEX ACQUIRE try: if entity.resource in assigned_resources: # URLs need to be unique for self.entity_errors entity.resource += " (duplicate-" + str(uuid.uuid1()) + ")" return entity, None, -1,\ ("ResourceAlreadyAssignedError", "Resource url was already assigned to another " "entity.") else: assigned_resources.append(entity.resource) finally: assigned_resources_mutex.release() # MUTEX RELEASE time_before = time.time() try: # For some reason this call still produces a traceback if we # catch the exception further down, how to suppress these? # Seems like Django logging. Do we want to disable that for the # test? How? response = client.get(entity.resource, { 'format': 'json', 'validate': 'true' }) except Exception as e: time_taken = time.time() - time_before return entity, None, time_taken,\ (type(e).__name__, str(e)) time_taken = time.time() - time_before if response.status_code != 200: return entity, None, time_taken,\ ("ResponseStatusNotOkError", "Did not return status ok (200) but instead '{}'." .format(response.status_code)) parsed_response = json.loads(response.content) parsed_classifier = parsed_response['classifier'] parsed_name = parsed_response['name'] wikified_name = wikifyNamespace(parsed_name) if wikified_name and wikified_name != 'None': parsed_name = wikified_name if entity.classifier != parsed_classifier: return entity, None, time_taken,\ ("WrongClassifierError", "Did not have expected classifier '{} but instead '{}'." .format(entity.classifier, parsed_classifier)) if entity.name != parsed_name: return entity, None, time_taken,\ ("WrongNameError", "Did not have expected name '{}' but instead '{}'." .format(entity.name, parsed_name)) return entity, parsed_response, time_taken, None
def query_and_analyze_entity(entity): assigned_resources_mutex.acquire() # MUTEX ACQUIRE try: if entity.resource in assigned_resources: # URLs need to be unique for self.entity_errors entity.resource += " (duplicate-" + str(uuid.uuid1()) + ")" return entity, None, -1,\ ("ResourceAlreadyAssignedError", "Resource url was already assigned to another " "entity.") else: assigned_resources.append(entity.resource) finally: assigned_resources_mutex.release() # MUTEX RELEASE time_before = time.time() try: # For some reason this call still produces a traceback if we # catch the exception further down, how to suppress these? # Seems like Django logging. Do we want to disable that for the # test? How? response = client.get(entity.resource, {'format': 'json', 'validate': 'true'}) except Exception as e: time_taken = time.time() - time_before return entity, None, time_taken,\ (type(e).__name__, str(e)) time_taken = time.time() - time_before if response.status_code != 200: return entity, None, time_taken,\ ("ResponseStatusNotOkError", "Did not return status ok (200) but instead '{}'." .format(response.status_code)) parsed_response = json.loads(response.content) parsed_classifier = parsed_response['classifier'] parsed_name = parsed_response['name'] wikified_name = wikifyNamespace(parsed_name) if wikified_name and wikified_name != 'None': parsed_name = wikified_name if entity.classifier != parsed_classifier: return entity, None, time_taken,\ ("WrongClassifierError", "Did not have expected classifier '{} but instead '{}'." .format(entity.classifier, parsed_classifier)) if entity.name != parsed_name: return entity, None, time_taken,\ ("WrongNameError", "Did not have expected name '{}' but instead '{}'." .format(entity.name, parsed_name)) return entity, parsed_response, time_taken, None
def discoverMemberFile(namespace, member, path, file): filePath = os.path.join(namespace, member, path, file) if not DumpdataProvider.exists(filePath): raise ResourceNotFoundException() # if no geshi code is defined, then we'll return basically "geshi : null" and nothing else locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath) response = { "geshi": geshi, "fragments": [], "classifier": "File", "name": file, "github": DumpdataProvider.getGithub(namespace, member), } if language: response["language"] = language # update github data if response["github"]: response["github"] = os.path.join(response["github"], path, file) # gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response["namespace"] = wikiNS # gather member data - if there is a fact extractor, then we also want give back selectable fragments try: extractedFacts = DumpdataProvider.getFacts(filePath, extractor) print extractedFacts.get("fragments") for fragment in extractedFacts.get("fragments", []): print fragment fragmentPath = os.path.join(fragment["classifier"], fragment["name"]) response["fragments"].append(mapFragment(filePath, fragmentPath, fragment)) except OSError: pass # gather content - if there is a geshi code, we should be able to get content if geshi: response["content"] = DumpdataProvider.read(filePath) # commit infos setCommitInfos(response, filePath) # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) response["derived"] = DumpdataProvider.getDerivedFiles(filePath) return response
def discoverMemberFile(namespace, member, path, file): filePath = os.path.join(namespace, member, path, file) if not DumpdataProvider.exists(filePath): raise ResourceNotFoundException() #if no geshi code is defined, then we'll return basically "geshi : null" and nothing else locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath) response = { 'geshi' : geshi, 'fragments' : [], 'classifier': 'File', 'name' : file, 'github' : DumpdataProvider.getGithub(namespace,member) } if language: response['language'] = language #update github data if response['github']: response['github'] = os.path.join(response['github'], path, file) #gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response['namespace'] = wikiNS #gather member data - if there is a fact extractor, then we also want give back selectable fragments try: extractedFacts = DumpdataProvider.getFacts(filePath, extractor) for fragment in extractedFacts.get('fragments', []): fragmentPath = os.path.join(fragment['classifier'], fragment['name']) response['fragments'].append( mapFragment(filePath, fragmentPath, fragment) ) except OSError: pass #gather content - if there is a geshi code, we should be able to get content if geshi: response['content'] = DumpdataProvider.read(filePath) #commit infos setCommitInfos(response,filePath) #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) response['derived'] = DumpdataProvider.getDerivedFiles(filePath) return response
def discoverNamespaceMember(namespace, member): if not member.decode('utf_8') in DumpdataProvider.getMembers(namespace): raise ResourceNotFoundException() response = { 'folders' : [], 'files' : [], 'classifier': 'Namespace member', 'name' : member, 'github' : DumpdataProvider.getGithub(namespace, member) } #gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response['namespace'] = wikiNS #gather member data dirPath = os.path.join(namespace, member.replace(' ','_')) files, dirs = DumpdataProvider.getDirContent(dirPath) for d in dirs: response['folders'].append({ 'resource' : os.path.join(base_uri, dirPath, d), 'classifier': 'Folder', 'name' : d }) for f in files: response['files'].append({ 'resource' : os.path.join(base_uri, dirPath, f), 'classifier': 'File', 'name' : f }) #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) if namespace == 'modules': response['module'] = DumpdataProvider.getModuleDescription(member) return response
def discoverMemberPath(namespace, member, path): response = { 'folders' : [], 'files' : [], 'classifier': 'Folder', 'name' : os.path.basename(path), 'github' : DumpdataProvider.getGithub(namespace, member) } #update github data if response['github']: response['github'] = os.path.join(response['github'], path) #gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response['namespace'] = wikiNS #gather member data dirPath = os.path.join(namespace, member, path) files, dirs = DumpdataProvider.getDirContent(dirPath) for d in dirs: response['folders'].append({ 'resource': os.path.join(base_uri, dirPath, d), 'classifier': 'Folder', 'name' : d }) for f in files: response['files'].append({ 'resource': os.path.join(base_uri, dirPath, f), 'classifier': 'File', 'name' : f, }) #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) return response
def discoverFileFragment(namespace, member, path, file, fragment): filePath = os.path.join(namespace, member, path, file) if not DumpdataProvider.exists(filePath): raise ResourceNotFoundException() # remove tailing slash, if there is one if fragment.endswith("/"): fragment = fragment[:-1] # if no geshi code is defined, then we'll return basically "geshi : null" locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath) # name and classifier are set later (in the extractor phase response = {"geshi": geshi, "fragments": [], "github": DumpdataProvider.getGithub(namespace, member)} if language: response["language"] = language # update github data if response["github"]: response["github"] = os.path.join(response["github"], path, file) # gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response["namespace"] = wikiNS # gather member data lineNumbers = None print extractor # if extractor: try: extractedFacts = DumpdataProvider.getFacts(filePath, extractor) print extractedFacts # TODO There has to be a better way to do this for f1 in extractedFacts["fragments"]: selected, fragmentPath = find(f1, fragment) if selected: response["classifier"] = selected["classifier"] response["name"] = selected["name"] if "startLine" in selected: lineNumbers = {"from": selected["startLine"], "to": selected["endLine"]} for f2 in selected.get("fragments", []): response["fragments"].append(mapFragment(filePath, fragmentPath, f2)) break except: pass # gather content if lineNumbers or locator: if not lineNumbers: lineNumbers = DumpdataProvider.getFragment(filePath, fragment, locator) fragmentText = DumpdataProvider.read(filePath, range(lineNumbers["from"] - 1, lineNumbers["to"])) response["content"] = fragmentText if response["github"]: response["github"] += "#L{0}-{1}".format(lineNumbers["from"], lineNumbers["to"]) # except Exception as e: # raise DiscoveryException('500 Internal Server Error', 'Fragment location failed:\n' + str(e)) setCommitInfos(response, filePath) # response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) return response
def discoverFileFragment(namespace, member, path, file, fragment): filePath = os.path.join(namespace, member, path, file) if not DumpdataProvider.exists(filePath): raise ResourceNotFoundException() #remove tailing slash, if there is one if fragment.endswith('/'): fragment = fragment[:-1] #if no geshi code is defined, then we'll return basically "geshi : null" locator, extractor, geshi, language = DumpdataProvider.getMetadata(filePath) #name and classifier are set later (in the extractor phase response = { 'geshi' : geshi, 'fragments': [], 'github' : DumpdataProvider.getGithub(namespace,member) } if language: response['language'] = language #update github data if response['github']: response['github'] = os.path.join(response['github'], path, file) #gather wiki data wikiNS = wikifyNamespace(namespace) setWikidata(response, wikiNS, member) response['namespace'] = wikiNS #gather member data lineNumbers = None print extractor #if extractor: try: extractedFacts = DumpdataProvider.getFacts(filePath, extractor) print extractedFacts #TODO There has to be a better way to do this for f1 in extractedFacts['fragments']: selected, fragmentPath = find(f1, fragment) if selected: response['classifier'] = selected['classifier'] response['name'] = selected['name'] if 'startLine' in selected: lineNumbers = {'from':selected['startLine'], 'to': selected['endLine']} for f2 in selected.get('fragments',[]): response['fragments'].append(mapFragment(filePath, fragmentPath, f2)) break except: pass #gather content if lineNumbers or locator: if not lineNumbers: lineNumbers = DumpdataProvider.getFragment(filePath, fragment, locator) fragmentText = DumpdataProvider.read(filePath, range(lineNumbers['from'] - 1, lineNumbers['to'])) response['content'] = fragmentText if response['github']: response['github'] += '#L{0}-{1}'.format(lineNumbers['from'], lineNumbers['to']) #except Exception as e: # raise DiscoveryException('500 Internal Server Error', 'Fragment location failed:\n' + str(e)) setCommitInfos(response, filePath) #response['endpoint'] = TripledataProvider.getEndpointLink(wikiNS, member) # response['sesame'] = TripledataProvider.getSesameLink(wikiNS, member) return response